DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF
@ 2021-06-01  1:40 Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 1/5] common/iavf: add support for ETS-based Tx QoS Ting Xu
                   ` (9 more replies)
  0 siblings, 10 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-01  1:40 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch enables the ETS-based Tx QoS for IAVF. Kernel tool is used to
configure ETS first. DCF is used to set bandwidth limit for VFs of each
TC. IAVF is supported to query QoS capability and set queue TC mapping.
Traffic Management API is utilized to configure the QoS hierarchy
scheduler tree. The scheduler tree will be passed to hardware to enable
all above functions.

Ting Xu (5):
  common/iavf: add support for ETS-based Tx QoS
  net/ice/base: support DCF query port ETS adminq
  net/ice: support DCF link status event handling
  net/ice: support QoS config VF bandwidth in DCF
  net/iavf: query QoS cap and set queue TC mapping

 drivers/common/iavf/iavf_type.h  |   2 +
 drivers/common/iavf/virtchnl.h   | 117 ++++++
 drivers/net/iavf/iavf.h          |  45 +++
 drivers/net/iavf/iavf_ethdev.c   |  31 ++
 drivers/net/iavf/iavf_tm.c       | 675 +++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c    |  56 ++-
 drivers/net/iavf/meson.build     |   1 +
 drivers/net/ice/base/ice_dcb.c   |   3 +-
 drivers/net/ice/ice_dcf.c        |   6 +-
 drivers/net/ice/ice_dcf.h        |  53 +++
 drivers/net/ice/ice_dcf_ethdev.c |  67 ++-
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  81 ++++
 drivers/net/ice/ice_dcf_sched.c  | 604 +++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 15 files changed, 1740 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 1/5] common/iavf: add support for ETS-based Tx QoS
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-06-01  1:40 ` Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-01  1:40 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch adds support to configure ETS-based Tx QoS. Three parts of
new virtchnl structures and opcodes are added to achieve:
1. Configure VF TC bandwidth limits.
2. VF queries current QoS configuration from PF.
3. Set up VF queue TC mapping.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/common/iavf/iavf_type.h |   2 +
 drivers/common/iavf/virtchnl.h  | 117 ++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)

diff --git a/drivers/common/iavf/iavf_type.h b/drivers/common/iavf/iavf_type.h
index f3815d523b..73dfb47e70 100644
--- a/drivers/common/iavf/iavf_type.h
+++ b/drivers/common/iavf/iavf_type.h
@@ -141,6 +141,8 @@ enum iavf_debug_mask {
 #define IAVF_PHY_LED_MODE_MASK			0xFFFF
 #define IAVF_PHY_LED_MODE_ORIG			0x80000000
 
+#define IAVF_MAX_TRAFFIC_CLASS	8
+
 /* Memory types */
 enum iavf_memset_type {
 	IAVF_NONDMA_MEM = 0,
diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 3a60faff93..a00cd76118 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -130,6 +130,7 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 	/* opcodes 34, 35, 36, and 37 are reserved */
+	VIRTCHNL_OP_DCF_CONFIG_VF_TC = 37,
 	VIRTCHNL_OP_DCF_VLAN_OFFLOAD = 38,
 	VIRTCHNL_OP_DCF_CMD_DESC = 39,
 	VIRTCHNL_OP_DCF_CMD_BUFF = 40,
@@ -152,6 +153,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
 	VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2 = 58,
 	VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2 = 59,
+	VIRTCHNL_OP_GET_QOS_CAPS = 66,
+	VIRTCHNL_OP_CONFIG_TC_MAP = 67,
 	VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
 	VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
 	VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
@@ -398,6 +401,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+#define VIRTCHNL_VF_OFFLOAD_TC		BIT(29)
 #define VIRTCHNL_VF_CAP_DCF			BIT(30)
 	/* BIT(31) is reserved */
 
@@ -1786,6 +1790,91 @@ struct virtchnl_fdir_query {
 
 VIRTCHNL_CHECK_STRUCT_LEN(48, virtchnl_fdir_query);
 
+/* VIRTCHNL_OP_DCF_CONFIG_VF_TC
+ * VF send this message to set the configuration of each TC with a
+ * specific vf id.
+ */
+enum virtchnl_bw_limit_type {
+	VIRTCHNL_BW_SHAPER = 0,
+};
+
+struct virtchnl_shaper_bw {
+	u32 committed;
+	u32 peak;
+};
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw);
+
+struct virtchnl_dcf_vf_bw_cfg {
+	u8 tc_id;
+	u8 pad[3];
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_dcf_vf_bw_cfg);
+
+struct virtchnl_dcf_vf_bw_cfg_list {
+	u16 vf_id;
+	u16 num_elem;
+	struct virtchnl_dcf_vf_bw_cfg cfg[1];
+};
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_dcf_vf_bw_cfg_list);
+
+/* VIRTCHNL_OP_GET_QOS_CAPS
+ * VF sends this message to get its QoS Caps, such as
+ * TC number, Arbiter and Bandwidth.
+ */
+struct virtchnl_qos_cap_elem {
+	u8 tc_id;
+	u8 prio_of_tc;
+#define VIRTCHNL_ABITER_STRICT    0
+#define VIRTCHNL_ABITER_ETS       2
+	u8 arbiter;
+	u8 weight;
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem);
+
+struct virtchnl_qos_cap_list {
+	u16 vsi_id;
+	u16 num_elem;
+	struct virtchnl_qos_cap_elem cap[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_qos_cap_list);
+
+/* VIRTCHNL_OP_CONFIG_TC_MAP
+ * VF sends message virtchnl_queue_tc_mapping to set queue to tc
+ * mapping for all the Tx and Rx queues with a specified VSI, and
+ * would get response about bitmap of valid user priorities
+ * associated with queues.
+ */
+struct virtchnl_queue_tc_mapping {
+	u16 vsi_id;
+	u16 num_tc;
+	u16 num_queue_pairs;
+	u8 pad[2];
+	union {
+		struct {
+			u16 start_queue_id;
+			u16 queue_count;
+		} req;
+		struct {
+#define VIRTCHNL_USER_PRIO_TYPE_UP	0
+#define VIRTCHNL_USER_PRIO_TYPE_DSCP	1
+			u16 prio_type;
+			u16 valid_prio_bitmap;
+		} resp;
+	} tc[1];
+};
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
+
 /* TX and RX queue types are valid in legacy as well as split queue models.
  * With Split Queue model, 2 additional types are introduced - TX_COMPLETION
  * and RX_BUFFER. In split queue model, RX corresponds to the queue where HW
@@ -2117,6 +2206,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DCF_GET_VSI_MAP:
 	case VIRTCHNL_OP_DCF_GET_PKG_INFO:
 		break;
+	case VIRTCHNL_OP_DCF_CONFIG_VF_TC:
+		valid_len = sizeof(struct virtchnl_dcf_vf_bw_cfg_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_dcf_vf_bw_cfg_list *cfg_list =
+				(struct virtchnl_dcf_vf_bw_cfg_list *)msg;
+			if (cfg_list->num_elem == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (cfg_list->num_elem - 1) *
+					 sizeof(struct virtchnl_dcf_vf_bw_cfg);
+		}
+		break;
 	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		break;
 	case VIRTCHNL_OP_ADD_RSS_CFG:
@@ -2132,6 +2234,21 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_QUERY_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_query);
 		break;
+	case VIRTCHNL_OP_GET_QOS_CAPS:
+		break;
+	case VIRTCHNL_OP_CONFIG_TC_MAP:
+		valid_len = sizeof(struct virtchnl_queue_tc_mapping);
+		if (msglen >= valid_len) {
+			struct virtchnl_queue_tc_mapping *q_tc =
+				(struct virtchnl_queue_tc_mapping *)msg;
+			if (q_tc->num_tc == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (q_tc->num_tc - 1) *
+					 sizeof(q_tc->tc[0]);
+		}
+		break;
 	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
 		break;
 	case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 2/5] net/ice/base: support DCF query port ETS adminq
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 1/5] common/iavf: add support for ETS-based Tx QoS Ting Xu
@ 2021-06-01  1:40 ` Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 3/5] net/ice: support DCF link status event handling Ting Xu
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-01  1:40 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

In the adminq command query port ETS function, the root node teid is
needed. However, for DCF, the root node is not initialized, which will
cause error when we refer to the variable. In this patch, we will check
whether the root node is available or not first.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/base/ice_dcb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_dcb.c b/drivers/net/ice/base/ice_dcb.c
index 0aaa5ae8c1..08c950cd9a 100644
--- a/drivers/net/ice/base/ice_dcb.c
+++ b/drivers/net/ice/base/ice_dcb.c
@@ -1483,7 +1483,8 @@ ice_aq_query_port_ets(struct ice_port_info *pi,
 		return ICE_ERR_PARAM;
 	cmd = &desc.params.port_ets;
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
-	cmd->port_teid = pi->root->info.node_teid;
+	if (pi->root)
+		cmd->port_teid = pi->root->info.node_teid;
 
 	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
 	return status;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 3/5] net/ice: support DCF link status event handling
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 1/5] common/iavf: add support for ETS-based Tx QoS Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
@ 2021-06-01  1:40 ` Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-01  1:40 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

When link status changes, DCF will receive virtchnl PF event message.
Add support to handle this event, change link status and update link
info.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.h        |  6 ++++
 drivers/net/ice/ice_dcf_ethdev.c | 54 ++++++++++++++++++++++++++++++--
 drivers/net/ice/ice_dcf_parent.c | 51 ++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 0cb90b5e9f..587093b909 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -60,6 +60,10 @@ struct ice_dcf_hw {
 	uint16_t nb_msix;
 	uint16_t rxq_map[16];
 	struct virtchnl_eth_stats eth_stats_offset;
+
+	/* Link status */
+	bool link_up;
+	uint32_t link_speed;
 };
 
 int ice_dcf_execute_virtchnl_cmd(struct ice_dcf_hw *hw,
@@ -77,5 +81,7 @@ int ice_dcf_disable_queues(struct ice_dcf_hw *hw);
 int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 			struct virtchnl_eth_stats *pstats);
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
+int ice_dcf_link_update(struct rte_eth_dev *dev,
+		    __rte_unused int wait_to_complete);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index b937cbbb03..332ce340cf 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -880,11 +880,59 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
-static int
-ice_dcf_link_update(__rte_unused struct rte_eth_dev *dev,
+int
+ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete)
 {
-	return 0;
+	struct ice_dcf_adapter *ad = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &ad->real_hw;
+	struct rte_eth_link new_link;
+
+	memset(&new_link, 0, sizeof(new_link));
+
+	/* Only read status info stored in VF, and the info is updated
+	 *  when receive LINK_CHANGE evnet from PF by Virtchnnl.
+	 */
+	switch (hw->link_speed) {
+	case 10:
+		new_link.link_speed = ETH_SPEED_NUM_10M;
+		break;
+	case 100:
+		new_link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+	case 1000:
+		new_link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+	case 10000:
+		new_link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	case 20000:
+		new_link.link_speed = ETH_SPEED_NUM_20G;
+		break;
+	case 25000:
+		new_link.link_speed = ETH_SPEED_NUM_25G;
+		break;
+	case 40000:
+		new_link.link_speed = ETH_SPEED_NUM_40G;
+		break;
+	case 50000:
+		new_link.link_speed = ETH_SPEED_NUM_50G;
+		break;
+	case 100000:
+		new_link.link_speed = ETH_SPEED_NUM_100G;
+		break;
+	default:
+		new_link.link_speed = ETH_SPEED_NUM_NONE;
+		break;
+	}
+
+	new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	new_link.link_status = hw->link_up ? ETH_LINK_UP :
+					     ETH_LINK_DOWN;
+	new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+				ETH_LINK_SPEED_FIXED);
+
+	return rte_eth_linkstatus_set(dev, &new_link);
 }
 
 /* Add UDP tunneling port */
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 1d7aa8bc87..0c0706316d 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -178,6 +178,44 @@ start_vsi_reset_thread(struct ice_dcf_hw *dcf_hw, bool vfr, uint16_t vf_id)
 	}
 }
 
+static uint32_t
+ice_dcf_convert_link_speed(enum virtchnl_link_speed virt_link_speed)
+{
+	uint32_t speed;
+
+	switch (virt_link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		speed = 2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		speed = 5000;
+		break;
+	default:
+		speed = 0;
+		break;
+	}
+
+	return speed;
+}
+
 void
 ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 			    uint8_t *msg, uint16_t msglen)
@@ -196,6 +234,19 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 		break;
 	case VIRTCHNL_EVENT_LINK_CHANGE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event");
+		dcf_hw->link_up = pf_msg->event_data.link_event.link_status;
+		if (dcf_hw->vf_res->vf_cap_flags &
+			VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+			dcf_hw->link_speed =
+				pf_msg->event_data.link_event_adv.link_speed;
+		} else {
+			enum virtchnl_link_speed speed;
+			speed = pf_msg->event_data.link_event.link_speed;
+			dcf_hw->link_speed = ice_dcf_convert_link_speed(speed);
+		}
+		ice_dcf_link_update(dcf_hw->eth_dev, 0);
+		rte_eth_dev_callback_process(dcf_hw->eth_dev,
+			RTE_ETH_EVENT_INTR_LSC, NULL);
 		break;
 	case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event");
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 4/5] net/ice: support QoS config VF bandwidth in DCF
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (2 preceding siblings ...)
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 3/5] net/ice: support DCF link status event handling Ting Xu
@ 2021-06-01  1:40 ` Ting Xu
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-01  1:40 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch supports the ETS-based QoS configuration. It enables the DCF
to configure bandwidth limits for each VF VSI of different TCs. A
hierarchy scheduler tree is built with port, TC and VSI nodes.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.c        |   6 +-
 drivers/net/ice/ice_dcf.h        |  47 +++
 drivers/net/ice/ice_dcf_ethdev.c |  13 +
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  30 ++
 drivers/net/ice/ice_dcf_sched.c  | 604 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 7 files changed, 704 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index d72a6f357e..f8b4e07d86 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -235,7 +235,8 @@ ice_dcf_get_vf_resource(struct ice_dcf_hw *hw)
 	caps = VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RX_POLLING |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED | VIRTCHNL_VF_CAP_DCF |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
+	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC |
+	       VIRTCHNL_VF_OFFLOAD_TC;
 
 	err = ice_dcf_send_cmd_req_no_irq(hw, VIRTCHNL_OP_GET_VF_RESOURCES,
 					  (uint8_t *)&caps, sizeof(caps));
@@ -668,6 +669,9 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 		}
 	}
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_TC)
+		ice_dcf_tm_conf_init(eth_dev);
+
 	hw->eth_dev = eth_dev;
 	rte_intr_callback_register(&pci_dev->intr_handle,
 				   ice_dcf_dev_interrupt_handler, hw);
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 587093b909..e74e5d7e81 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -6,6 +6,7 @@
 #define _ICE_DCF_H_
 
 #include <ethdev_driver.h>
+#include <rte_tm_driver.h>
 
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
@@ -30,6 +31,49 @@ struct dcf_virtchnl_cmd {
 	volatile int pending;
 };
 
+struct ice_dcf_tm_shaper_profile {
+	TAILQ_ENTRY(ice_dcf_tm_shaper_profile) node;
+	uint32_t shaper_profile_id;
+	uint32_t reference_count;
+	struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(ice_dcf_shaper_profile_list, ice_dcf_tm_shaper_profile);
+
+/* Struct to store Traffic Manager node configuration. */
+struct ice_dcf_tm_node {
+	TAILQ_ENTRY(ice_dcf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct ice_dcf_tm_node *parent;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(ice_dcf_tm_node_list, ice_dcf_tm_node);
+
+/* node type of Traffic Manager */
+enum ice_dcf_tm_node_type {
+	ICE_DCF_TM_NODE_TYPE_PORT,
+	ICE_DCF_TM_NODE_TYPE_TC,
+	ICE_DCF_TM_NODE_TYPE_VSI,
+	ICE_DCF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct ice_dcf_tm_conf {
+	struct ice_dcf_shaper_profile_list shaper_profile_list;
+	struct ice_dcf_tm_node *root; /* root node - port */
+	struct ice_dcf_tm_node_list tc_list; /* node list for all the TCs */
+	struct ice_dcf_tm_node_list vsi_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_vsi_node;
+	bool committed;
+};
+
 struct ice_dcf_hw {
 	struct iavf_hw avf;
 
@@ -45,6 +89,8 @@ struct ice_dcf_hw {
 	uint16_t *vf_vsi_map;
 	uint16_t pf_vsi_id;
 
+	struct ice_dcf_tm_conf tm_conf;
+	struct ice_aqc_port_ets_elem *ets_config;
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
@@ -83,5 +129,6 @@ int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
 int ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete);
+void ice_dcf_tm_conf_init(struct rte_eth_dev *dev);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 332ce340cf..91c4486260 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -993,6 +993,18 @@ ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 	return ret;
 }
 
+static int
+ice_dcf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+		void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &ice_dcf_tm_ops;
+
+	return 0;
+}
+
 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.dev_start               = ice_dcf_dev_start,
 	.dev_stop                = ice_dcf_dev_stop,
@@ -1017,6 +1029,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.flow_ops_get            = ice_dcf_dev_flow_ops_get,
 	.udp_tunnel_port_add	 = ice_dcf_dev_udp_tunnel_port_add,
 	.udp_tunnel_port_del	 = ice_dcf_dev_udp_tunnel_port_del,
+	.tm_ops_get              = ice_dcf_tm_ops_get,
 };
 
 static int
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index e7c9d7fe41..8510e37119 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -7,6 +7,8 @@
 
 #include "base/ice_common.h"
 #include "base/ice_adminq_cmd.h"
+#include "base/ice_dcb.h"
+#include "base/ice_sched.h"
 
 #include "ice_ethdev.h"
 #include "ice_dcf.h"
@@ -52,6 +54,7 @@ struct ice_dcf_vf_repr {
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
 
+extern const struct rte_tm_ops ice_dcf_tm_ops;
 void ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 				 uint8_t *msg, uint16_t msglen);
 int ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 0c0706316d..2403d9c259 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -264,6 +264,29 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 	}
 }
 
+static int
+ice_dcf_query_port_ets(struct ice_hw *parent_hw, struct ice_dcf_hw *real_hw)
+{
+	int ret;
+
+	real_hw->ets_config = (struct ice_aqc_port_ets_elem *)
+			ice_malloc(real_hw, sizeof(*real_hw->ets_config));
+	if (!real_hw->ets_config)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_query_port_ets(parent_hw->port_info,
+			real_hw->ets_config, sizeof(*real_hw->ets_config),
+			NULL);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "DCF Query Port ETS failed");
+		rte_free(real_hw->ets_config);
+		real_hw->ets_config = NULL;
+		return ret;
+	}
+
+	return ICE_SUCCESS;
+}
+
 static int
 ice_dcf_init_parent_hw(struct ice_hw *hw)
 {
@@ -487,6 +510,13 @@ ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev)
 		return err;
 	}
 
+	err = ice_dcf_query_port_ets(parent_hw, hw);
+	if (err) {
+		PMD_INIT_LOG(ERR, "failed to query port ets with error %d",
+			     err);
+		goto uninit_hw;
+	}
+
 	err = ice_dcf_load_pkg(parent_hw);
 	if (err) {
 		PMD_INIT_LOG(ERR, "failed to load package with error %d",
diff --git a/drivers/net/ice/ice_dcf_sched.c b/drivers/net/ice/ice_dcf_sched.c
new file mode 100644
index 0000000000..06d835ca24
--- /dev/null
+++ b/drivers/net/ice/ice_dcf_sched.c
@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "base/ice_sched.h"
+#include "ice_dcf_ethdev.h"
+
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+				   uint32_t shaper_profile_id,
+				   struct rte_tm_error *error);
+
+const struct rte_tm_ops ice_dcf_tm_ops = {
+	.shaper_profile_add = ice_dcf_shaper_profile_add,
+	.shaper_profile_delete = ice_dcf_shaper_profile_del,
+	.hierarchy_commit = ice_dcf_hierarchy_commit,
+	.node_add = ice_dcf_node_add,
+	.node_delete = ice_dcf_node_delete,
+};
+
+void
+ice_dcf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+
+	/* initialize shaper profile list */
+	TAILQ_INIT(&hw->tm_conf.shaper_profile_list);
+
+	/* initialize node configuration */
+	hw->tm_conf.root = NULL;
+	TAILQ_INIT(&hw->tm_conf.tc_list);
+	TAILQ_INIT(&hw->tm_conf.vsi_list);
+	hw->tm_conf.nb_tc_node = 0;
+	hw->tm_conf.nb_vsi_node = 0;
+	hw->tm_conf.committed = false;
+}
+
+static inline struct ice_dcf_tm_node *
+dcf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum ice_dcf_tm_node_type *node_type)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct ice_dcf_tm_node_list *tc_list = &hw->tm_conf.tc_list;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (hw->tm_conf.root && hw->tm_conf.root->id == node_id) {
+		*node_type = ICE_DCF_TM_NODE_TYPE_PORT;
+		return hw->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, vsi_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_VSI;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct ice_dcf_tm_shaper_profile *
+dcf_shaper_profile_search(struct rte_eth_dev *dev,
+			   uint32_t shaper_profile_id)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_shaper_profile_list *shaper_profile_list =
+		&hw->tm_conf.shaper_profile_list;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	TAILQ_FOREACH(shaper_profile, shaper_profile_list, node) {
+		if (shaper_profile_id == shaper_profile->shaper_profile_id)
+			return shaper_profile;
+	}
+
+	return NULL;
+}
+
+static int
+dcf_node_param_check(struct ice_dcf_hw *hw, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= 8 * hw->num_vfs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type parent_node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_tm_shaper_profile *shaper_profile = NULL;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *parent_node;
+	struct ice_dcf_tm_node *tm_node;
+	uint16_t tc_nb = 1;
+	int i, ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = dcf_node_param_check(hw, node_id, priority, weight,
+				   params, error);
+	if (ret)
+		return ret;
+
+	for (i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (hw->ets_config->tc_valid_bits & (1 << i))
+			tc_nb++;
+	}
+
+	/* check if the node is already existed */
+	if (dcf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* check the shaper profile id */
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = dcf_shaper_profile_search(dev,
+			params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
+	}
+
+	/* add root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != ICE_DCF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (hw->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("ice_dcf_tm_node",
+				      sizeof(struct ice_dcf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		hw->tm_conf.root = tm_node;
+
+		return 0;
+	}
+
+	/* TC or vsi node */
+	/* check the parent node */
+	parent_node = dcf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != ICE_DCF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != ICE_DCF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not port or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the TC node number */
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (hw->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the vsi node number */
+		if (parent_node->reference_count >= hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many VSI for one TC";
+			return -EINVAL;
+		}
+		/* check the vsi node id */
+		if (node_id > tc_nb * hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large VSI id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or vsi node */
+	tm_node = rte_zmalloc("ice_dcf_tm_node",
+			      sizeof(struct ice_dcf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->priority = priority;
+	tm_node->weight = weight;
+	tm_node->shaper_profile = shaper_profile;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = hw->tm_conf.nb_tc_node;
+		hw->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.vsi_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		hw->tm_conf.nb_vsi_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	/* increase the reference counter of the shaper profile */
+	if (shaper_profile)
+		shaper_profile->reference_count++;
+
+	return 0;
+}
+
+static int
+ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = dcf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
+		rte_free(tm_node);
+		hw->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or VSI node */
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
+	tm_node->parent->reference_count--;
+	if (node_type == ICE_DCF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		hw->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		hw->tm_conf.nb_vsi_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+dcf_shaper_profile_param_check(struct rte_tm_shaper_params *profile,
+				struct rte_tm_error *error)
+{
+	/* min bucket size not supported */
+	if (profile->committed.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE;
+		error->message = "committed bucket size not supported";
+		return -EINVAL;
+	}
+	/* max bucket size not supported */
+	if (profile->peak.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE;
+		error->message = "peak bucket size not supported";
+		return -EINVAL;
+	}
+	/* length adjustment not supported */
+	if (profile->pkt_length_adjust) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN;
+		error->message = "packet length adjustment not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	int ret;
+
+	if (!profile || !error)
+		return -EINVAL;
+
+	ret = dcf_shaper_profile_param_check(profile, error);
+	if (ret)
+		return ret;
+
+	shaper_profile = dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID exist";
+		return -EINVAL;
+	}
+
+	shaper_profile = rte_zmalloc("ice_dcf_tm_shaper_profile",
+				     sizeof(struct ice_dcf_tm_shaper_profile),
+				     0);
+	if (!shaper_profile)
+		return -ENOMEM;
+	shaper_profile->shaper_profile_id = shaper_profile_id;
+	rte_memcpy(&shaper_profile->profile, profile,
+			 sizeof(struct rte_tm_shaper_params));
+	TAILQ_INSERT_TAIL(&hw->tm_conf.shaper_profile_list,
+			  shaper_profile, node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	if (!error)
+		return -EINVAL;
+
+	shaper_profile = dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (!shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID not exist";
+		return -EINVAL;
+	}
+
+	/* don't delete a profile if it's used by one or several nodes */
+	if (shaper_profile->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
+		error->message = "profile in use";
+		return -EINVAL;
+	}
+
+	TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list, shaper_profile, node);
+	rte_free(shaper_profile);
+
+	return 0;
+}
+
+static int
+ice_dcf_set_vf_bw(struct ice_dcf_hw *hw,
+			struct virtchnl_dcf_vf_bw_cfg_list *vf_bw,
+			uint16_t len)
+{
+	struct dcf_virtchnl_cmd args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.v_op = VIRTCHNL_OP_DCF_CONFIG_VF_TC;
+	args.req_msg = (uint8_t *)vf_bw;
+	args.req_msglen  = len;
+	err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "fail to execute command %s",
+			    "VIRTCHNL_OP_DCF_CONFIG_VF_TC");
+	return err;
+}
+
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct virtchnl_dcf_vf_bw_cfg_list *vf_bw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct rte_tm_shaper_params *profile;
+	struct ice_dcf_tm_node *tm_node;
+	uint32_t port_bw, cir_total;
+	uint16_t size, vf_id;
+	int ret;
+	int num_elem = 0;
+
+	size = sizeof(*vf_bw) +
+		sizeof(vf_bw->cfg[0]) * (hw->tm_conf.nb_tc_node - 1);
+	vf_bw = rte_zmalloc("vf_bw", size, 0);
+	if (!vf_bw)
+		return ICE_ERR_NO_MEMORY;
+
+	/* port bandwidth (Kbps) */
+	port_bw = hw->link_speed * 1000;
+	cir_total = 0;
+
+	for (vf_id = 0; vf_id < hw->num_vfs; vf_id++) {
+		num_elem = 0;
+		vf_bw->vf_id = vf_id;
+		TAILQ_FOREACH(tm_node, vsi_list, node) {
+			/* scan the nodes belong to one VSI */
+			if (tm_node->id - hw->num_vfs * tm_node->tc != vf_id)
+				continue;
+			vf_bw->cfg[num_elem].tc_id = tm_node->tc;
+			vf_bw->cfg[num_elem].type = VIRTCHNL_BW_SHAPER;
+			if (tm_node->shaper_profile) {
+				/* Transfer from Byte per seconds to Kbps */
+				profile = &tm_node->shaper_profile->profile;
+				vf_bw->cfg[num_elem].shaper.peak =
+				profile->peak.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].shaper.committed =
+				profile->committed.rate / 1000 * BITS_PER_BYTE;
+			}
+			cir_total += vf_bw->cfg[num_elem].shaper.committed;
+			num_elem++;
+		}
+
+		/* check if total CIR is larger than port bandwidth */
+		if (cir_total > port_bw) {
+			PMD_DRV_LOG(ERR, "Total CIR of all VFs is larger than port bandwidth");
+			return ICE_ERR_PARAM;
+		}
+		vf_bw->num_elem = num_elem;
+		ret = ice_dcf_set_vf_bw(hw, vf_bw, size);
+		if (ret)
+			return ret;
+	}
+
+	hw->tm_conf.committed = true;
+	return ICE_SUCCESS;
+}
diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
index 65750d3501..0b86d74a49 100644
--- a/drivers/net/ice/meson.build
+++ b/drivers/net/ice/meson.build
@@ -70,6 +70,7 @@ endif
 sources += files('ice_dcf.c',
          'ice_dcf_vf_representor.c',
          'ice_dcf_ethdev.c',
-         'ice_dcf_parent.c')
+         'ice_dcf_parent.c',
+	 'ice_dcf_sched.c')
 
 headers = files('rte_pmd_ice.h')
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v1 5/5] net/iavf: query QoS cap and set queue TC mapping
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (3 preceding siblings ...)
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
@ 2021-06-01  1:40 ` Ting Xu
  2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-01  1:40 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch added the support for VF to config the ETS-based Tx QoS,
including querying current QoS configuration from PF and config queue TC
mapping. PF QoS is configured in advance and the queried info is
provided to the user for future usage. VF queues are mapped to different
TCs in PF through virtchnl.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h        |  45 +++
 drivers/net/iavf/iavf_ethdev.c |  31 ++
 drivers/net/iavf/iavf_tm.c     | 675 +++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c  |  56 ++-
 drivers/net/iavf/meson.build   |   1 +
 5 files changed, 807 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4f5811ae87..77ddf15f42 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -6,6 +6,8 @@
 #define _IAVF_ETHDEV_H_
 
 #include <rte_kvargs.h>
+#include <rte_tm_driver.h>
+
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
 #include <iavf_type.h>
@@ -82,6 +84,8 @@
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
 
+#define IAVF_BITS_PER_BYTE 8
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -129,6 +133,38 @@ enum iavf_aq_result {
 	IAVF_MSG_CMD,      /* Read async command result */
 };
 
+/* Struct to store Traffic Manager node configuration. */
+struct iavf_tm_node {
+	TAILQ_ENTRY(iavf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct iavf_tm_node *parent;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
+
+/* node type of Traffic Manager */
+enum iavf_tm_node_type {
+	IAVF_TM_NODE_TYPE_PORT,
+	IAVF_TM_NODE_TYPE_TC,
+	IAVF_TM_NODE_TYPE_QUEUE,
+	IAVF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct iavf_tm_conf {
+	struct iavf_tm_node *root; /* root node - vf vsi */
+	struct iavf_tm_node_list tc_list; /* node list for all the TCs */
+	struct iavf_tm_node_list queue_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_queue_node;
+	bool committed;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -175,6 +211,9 @@ struct iavf_info {
 	struct iavf_fdir_info fdir; /* flow director info */
 	/* indicate large VF support enabled or not */
 	bool lv_enabled;
+
+	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_tm_conf tm_conf;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -344,4 +383,10 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			uint32_t mc_addrs_num, bool add);
 int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
+int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+			struct virtchnl_queue_tc_mapping *q_tc_mapping,
+			uint16_t size);
+void iavf_tm_conf_init(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops iavf_tm_ops;
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index cb38fe81e1..e0a03a0bee 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -122,6 +122,7 @@ static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
 static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
 			uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
@@ -200,8 +201,21 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
 	.flow_ops_get               = iavf_dev_flow_ops_get,
 	.tx_done_cleanup	    = iavf_dev_tx_done_cleanup,
 	.get_monitor_addr           = iavf_get_monitor_addr,
+	.tm_ops_get                 = iavf_tm_ops_get,
 };
 
+static int
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+			void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &iavf_tm_ops;
+
+	return 0;
+}
+
 static int
 iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
@@ -806,6 +820,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
 				      dev->data->nb_tx_queues);
 	num_queue_pairs = vf->num_queue_pairs;
 
+	if (iavf_get_qos_cap(adapter)) {
+		PMD_INIT_LOG(ERR, "Failed to get qos capability");
+		return -1;
+	}
+
 	if (iavf_init_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "failed to do Queue init");
 		return -1;
@@ -2090,6 +2109,15 @@ iavf_init_vf(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
 		goto err_api;
 	}
+
+	bufsz = sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+	vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+	if (!vf->qos_cap) {
+		PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+		goto err_api;
+	}
+
 	if (iavf_get_vf_resource(adapter) != 0) {
 		PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
 		goto err_alloc;
@@ -2131,6 +2159,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
 	rte_free(vf->rss_key);
 	rte_free(vf->rss_lut);
 err_alloc:
+	rte_free(vf->qos_cap);
 	rte_free(vf->vf_res);
 	vf->vsi_res = NULL;
 err_api:
@@ -2299,6 +2328,8 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
 
 	iavf_default_rss_disable(adapter);
 
+	iavf_tm_conf_init(eth_dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
new file mode 100644
index 0000000000..b8e11cbe84
--- /dev/null
+++ b/drivers/net/iavf/iavf_tm.c
@@ -0,0 +1,675 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "iavf.h"
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error);
+static int iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error);
+static int iavf_node_capabilities_get(struct rte_eth_dev *dev,
+				      uint32_t node_id,
+				      struct rte_tm_node_capabilities *cap,
+				      struct rte_tm_error *error);
+static int iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error);
+
+const struct rte_tm_ops iavf_tm_ops = {
+	.node_add = iavf_tm_node_add,
+	.node_delete = iavf_tm_node_delete,
+	.capabilities_get = iavf_tm_capabilities_get,
+	.level_capabilities_get = iavf_level_capabilities_get,
+	.node_capabilities_get = iavf_node_capabilities_get,
+	.node_type_get = iavf_node_type_get,
+	.hierarchy_commit = iavf_hierarchy_commit,
+};
+
+void
+iavf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	/* initialize node configuration */
+	vf->tm_conf.root = NULL;
+	TAILQ_INIT(&vf->tm_conf.tc_list);
+	TAILQ_INIT(&vf->tm_conf.queue_list);
+	vf->tm_conf.nb_tc_node = 0;
+	vf->tm_conf.nb_queue_node = 0;
+	vf->tm_conf.committed = false;
+}
+
+
+static inline struct iavf_tm_node *
+iavf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum iavf_tm_node_type *node_type)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node_list *tc_list = &vf->tm_conf.tc_list;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+
+	if (vf->tm_conf.root && vf->tm_conf.root->id == node_id) {
+		*node_type = IAVF_TM_NODE_TYPE_PORT;
+		return vf->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_QUEUE;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+iavf_node_param_check(struct iavf_info *vf, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shaper profile */
+	if (params->shaper_profile_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+		error->message = "shaper profile not supported";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= vf->num_queue_pairs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error)
+{
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!is_leaf || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type == IAVF_TM_NODE_TYPE_QUEUE)
+		*is_leaf = true;
+	else
+		*is_leaf = false;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	enum iavf_tm_node_type parent_node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+	struct iavf_tm_node *parent_node;
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+	int ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = iavf_node_param_check(vf, node_id, priority, weight,
+				    params, error);
+	if (ret)
+		return ret;
+
+	/* check if the node is already existed */
+	if (iavf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != IAVF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (vf->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("iavf_tm_node",
+				      sizeof(struct iavf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		vf->tm_conf.root = tm_node;
+		return 0;
+	}
+
+	/* TC or queue node */
+	/* check the parent node */
+	parent_node = iavf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != IAVF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not root or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the node number */
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (vf->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the queue number */
+		if (parent_node->reference_count >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many queues";
+			return -EINVAL;
+		}
+		if (node_id >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large queue id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or queue node */
+	tm_node = rte_zmalloc("iavf_tm_node",
+			      sizeof(struct iavf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = vf->tm_conf.nb_tc_node;
+		vf->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.queue_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		vf->tm_conf.nb_queue_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == IAVF_TM_NODE_TYPE_PORT) {
+		rte_free(tm_node);
+		vf->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or queue node */
+	tm_node->parent->reference_count--;
+	if (node_type == IAVF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		vf->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		vf->tm_conf.nb_queue_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (tc_nb > vf->vf_res->num_queue_pairs)
+		return -EINVAL;
+
+	error->type = RTE_TM_ERROR_TYPE_NONE;
+
+	/* set all the parameters to 0 first. */
+	memset(cap, 0, sizeof(struct rte_tm_capabilities));
+
+	/**
+	 * support port + TCs + queues
+	 * here shows the max capability not the current configuration.
+	 */
+	cap->n_nodes_max = 1 + IAVF_MAX_TRAFFIC_CLASS
+		+ vf->num_queue_pairs;
+	cap->n_levels_max = 3; /* port, TC, queue */
+	cap->non_leaf_nodes_identical = 1;
+	cap->leaf_nodes_identical = 1;
+	cap->shaper_n_max = cap->n_nodes_max;
+	cap->shaper_private_n_max = cap->n_nodes_max;
+	cap->shaper_private_dual_rate_n_max = 0;
+	cap->shaper_private_rate_min = 0;
+	/* GBps */
+	cap->shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->shaper_private_packet_mode_supported = 0;
+	cap->shaper_private_byte_mode_supported = 1;
+	cap->shaper_shared_n_max = 0;
+	cap->shaper_shared_n_nodes_per_shaper_max = 0;
+	cap->shaper_shared_n_shapers_per_node_max = 0;
+	cap->shaper_shared_dual_rate_n_max = 0;
+	cap->shaper_shared_rate_min = 0;
+	cap->shaper_shared_rate_max = 0;
+	cap->shaper_shared_packet_mode_supported = 0;
+	cap->shaper_shared_byte_mode_supported = 0;
+	cap->sched_n_children_max = vf->num_queue_pairs;
+	cap->sched_sp_n_priorities_max = 1;
+	cap->sched_wfq_n_children_per_group_max = 0;
+	cap->sched_wfq_n_groups_max = 0;
+	cap->sched_wfq_weight_max = 1;
+	cap->sched_wfq_packet_mode_supported = 0;
+	cap->sched_wfq_byte_mode_supported = 0;
+	cap->cman_head_drop_supported = 0;
+	cap->dynamic_update_mask = 0;
+	cap->shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD;
+	cap->shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS;
+	cap->cman_wred_context_n_max = 0;
+	cap->cman_wred_context_private_n_max = 0;
+	cap->cman_wred_context_shared_n_max = 0;
+	cap->cman_wred_context_shared_n_nodes_per_context_max = 0;
+	cap->cman_wred_context_shared_n_contexts_per_node_max = 0;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (level_id >= IAVF_TM_NODE_TYPE_MAX) {
+		error->type = RTE_TM_ERROR_TYPE_LEVEL_ID;
+		error->message = "too deep level";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (level_id == IAVF_TM_NODE_TYPE_PORT) {
+		cap->n_nodes_max = 1;
+		cap->n_nodes_nonleaf_max = 1;
+		cap->n_nodes_leaf_max = 0;
+	} else if (level_id == IAVF_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = vf->num_queue_pairs;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = vf->num_queue_pairs;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IAVF_TM_NODE_TYPE_QUEUE) {
+		cap->nonleaf.shaper_private_supported = false;
+		cap->nonleaf.shaper_private_dual_rate_supported = false;
+		cap->nonleaf.shaper_private_rate_min = 0;
+		/* GBps */
+		cap->nonleaf.shaper_private_rate_max =
+			vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+		cap->nonleaf.shaper_private_packet_mode_supported = 0;
+		cap->nonleaf.shaper_private_byte_mode_supported = 1;
+		cap->nonleaf.shaper_shared_n_max = 0;
+		cap->nonleaf.shaper_shared_packet_mode_supported = 0;
+		cap->nonleaf.shaper_shared_byte_mode_supported = 0;
+		if (level_id == IAVF_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IAVF_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				vf->num_queue_pairs;
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+		cap->nonleaf.sched_wfq_packet_mode_supported = 0;
+		cap->nonleaf.sched_wfq_byte_mode_supported = 0;
+		cap->nonleaf.stats_mask = 0;
+
+		return 0;
+	}
+
+	/* queue node */
+	cap->leaf.shaper_private_supported = false;
+	cap->leaf.shaper_private_dual_rate_supported = false;
+	cap->leaf.shaper_private_rate_min = 0;
+	/* GBps */
+	cap->leaf.shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;;
+	cap->leaf.shaper_private_packet_mode_supported = 0;
+	cap->leaf.shaper_private_byte_mode_supported = 1;
+	cap->leaf.shaper_shared_n_max = 0;
+	cap->leaf.shaper_shared_packet_mode_supported = 0;
+	cap->leaf.shaper_shared_byte_mode_supported = 0;
+	cap->leaf.cman_head_drop_supported = false;
+	cap->leaf.cman_wred_context_private_supported = true;
+	cap->leaf.cman_wred_context_shared_n_max = 0;
+	cap->leaf.stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_node_capabilities_get(struct rte_eth_dev *dev,
+			   uint32_t node_id,
+			   struct rte_tm_node_capabilities *cap,
+			   struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type;
+	struct virtchnl_qos_cap_elem tc_cap;
+	struct iavf_tm_node *tm_node;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "not support capability get";
+		return -EINVAL;
+	}
+
+	tc_cap = vf->qos_cap->cap[tm_node->tc];
+	if (tc_cap.tc_id != tm_node->tc) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "tc not match";
+		return -EINVAL;
+	}
+
+	cap->shaper_private_supported = true;
+	cap->shaper_private_dual_rate_supported = false;
+	cap->shaper_private_rate_min = tc_cap.shaper.committed;
+	cap->shaper_private_rate_max = tc_cap.shaper.peak;
+	cap->shaper_shared_n_max = 0;
+	cap->nonleaf.sched_n_children_max = vf->num_queue_pairs;
+
+	if (tc_cap.arbiter == VIRTCHNL_ABITER_ETS) {
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max =
+			vf->num_queue_pairs;
+		cap->nonleaf.sched_wfq_n_groups_max = 1;
+		cap->nonleaf.sched_wfq_weight_max = tc_cap.weight;
+	}
+
+	if (tc_cap.arbiter == VIRTCHNL_ABITER_STRICT) {
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+	}
+
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct virtchnl_queue_tc_mapping *q_tc_mapping;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+	uint16_t size;
+	int index = 0, node_committed = 0;
+	int ret, i;
+
+	size = sizeof(*q_tc_mapping) + sizeof(q_tc_mapping->tc[0]) *
+		(vf->qos_cap->num_elem - 1);
+	q_tc_mapping = rte_zmalloc("q_tc", size, 0);
+	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
+	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
+	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		q_tc_mapping->tc[tm_node->tc].req.queue_count++;
+		node_committed++;
+	}
+
+	for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+		q_tc_mapping->tc[i].req.start_queue_id = index;
+		index += q_tc_mapping->tc[i].req.queue_count;
+	}
+	if (node_committed < vf->num_queue_pairs) {
+		PMD_DRV_LOG(ERR, "queue node is less than allocated queue pairs");
+		return IAVF_ERR_PARAM;
+	}
+
+	ret = iavf_set_q_tc_map(dev, q_tc_mapping, size);
+	if (ret)
+		return ret;
+
+	return IAVF_SUCCESS;
+}
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 5d57e8b541..daa1b3755c 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -467,7 +467,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 		VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 		VIRTCHNL_VF_OFFLOAD_CRC |
 		VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-		VIRTCHNL_VF_LARGE_NUM_QPAIRS;
+		VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+		VIRTCHNL_VF_OFFLOAD_TC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -1550,6 +1551,59 @@ iavf_set_hena(struct iavf_adapter *adapter, uint64_t hena)
 	return err;
 }
 
+int
+iavf_get_qos_cap(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	uint32_t len;
+	int err;
+
+	args.ops = VIRTCHNL_OP_GET_QOS_CAPS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+	err = iavf_execute_vf_cmd(adapter, &args);
+
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_VF_RESOURCE");
+		return -1;
+	}
+
+	len =  sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+
+	rte_memcpy(vf->qos_cap, args.out_buffer,
+		   RTE_MIN(args.out_size, len));
+
+	return 0;
+}
+
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+		struct virtchnl_queue_tc_mapping *q_tc_mapping, uint16_t size)
+{
+	struct iavf_adapter *adapter =
+			IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_cmd_info args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.ops = VIRTCHNL_OP_CONFIG_TC_MAP;
+	args.in_args = (uint8_t *)q_tc_mapping;
+	args.in_args_size = size;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	err = iavf_execute_vf_cmd(adapter, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "Failed to execute command of"
+			    " VIRTCHNL_OP_CONFIG_TC_MAP");
+	return err;
+}
+
 int
 iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			struct rte_ether_addr *mc_addrs,
diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build
index 6f222a9e87..f2010a8337 100644
--- a/drivers/net/iavf/meson.build
+++ b/drivers/net/iavf/meson.build
@@ -19,6 +19,7 @@ sources = files(
         'iavf_generic_flow.c',
         'iavf_fdir.c',
         'iavf_hash.c',
+        'iavf_tm.c',
 )
 
 if arch_subdir == 'x86'
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (4 preceding siblings ...)
  2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
@ 2021-06-17 10:17 ` Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
                     ` (4 more replies)
  2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (3 subsequent siblings)
  9 siblings, 5 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-17 10:17 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch enables the ETS-based Tx QoS for IAVF. Kernel tool is used to
configure ETS first. DCF is used to set bandwidth limit for VFs of each
TC. IAVF is supported to query QoS capability and set queue TC mapping.
Traffic Management API is utilized to configure the QoS hierarchy
scheduler tree. The scheduler tree will be passed to hardware to enable
all above functions.

Ting Xu (5):
  common/iavf: support ETS-based QoS offload configuration
  net/ice/base: support DCF query port ETS adminq
  net/ice: support DCF link status event handling
  net/ice: support QoS config VF bandwidth in DCF
  net/iavf: query QoS cap and set queue TC mapping

 drivers/common/iavf/iavf_type.h  |   2 +
 drivers/common/iavf/virtchnl.h   | 125 ++++++
 drivers/net/iavf/iavf.h          |  45 ++
 drivers/net/iavf/iavf_ethdev.c   |  31 ++
 drivers/net/iavf/iavf_tm.c       | 663 +++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c    |  56 ++-
 drivers/net/iavf/meson.build     |   1 +
 drivers/net/ice/base/ice_dcb.c   |   3 +-
 drivers/net/ice/ice_dcf.c        |   6 +-
 drivers/net/ice/ice_dcf.h        |  53 +++
 drivers/net/ice/ice_dcf_ethdev.c |  67 ++-
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  81 ++++
 drivers/net/ice/ice_dcf_sched.c  | 688 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 15 files changed, 1820 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 1/5] common/iavf: support ETS-based QoS offload configuration
  2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-06-17 10:17   ` Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-17 10:17 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch adds new virtchnl opcodes and structures for QoS
configuration, which includes:
1. VIRTCHNL_VF_OFFLOAD_TC, to negotiate the capability supporting QoS
configuration. If VF and PF both have this flag, then the ETS-based QoS
offload function is supported.
2. VIRTCHNL_OP_DCF_CONFIG_BW, DCF is supposed to configure min and max
bandwidth for each VF per enabled TCs. To make the VSI node bandwidth
configuration work, DCF also needs to configure TC node bandwidth
directly.
3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such
as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The
configuration is previously set by DCB and DCF, and now is the potential
QoS capability of VF. VF can take it as reference to configure queue TC
mapping.
4. VIRTCHNL_OP_CONFIG_TC_MAP, set VF queues to TC mapping for all Tx and
Rx queues. Queues mapping to one TC should be continuous and all
allocated queues should be mapped.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/common/iavf/iavf_type.h |   2 +
 drivers/common/iavf/virtchnl.h  | 125 ++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/drivers/common/iavf/iavf_type.h b/drivers/common/iavf/iavf_type.h
index f3815d523b..73dfb47e70 100644
--- a/drivers/common/iavf/iavf_type.h
+++ b/drivers/common/iavf/iavf_type.h
@@ -141,6 +141,8 @@ enum iavf_debug_mask {
 #define IAVF_PHY_LED_MODE_MASK			0xFFFF
 #define IAVF_PHY_LED_MODE_ORIG			0x80000000
 
+#define IAVF_MAX_TRAFFIC_CLASS	8
+
 /* Memory types */
 enum iavf_memset_type {
 	IAVF_NONDMA_MEM = 0,
diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 3a60faff93..a56f8b4589 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -85,6 +85,10 @@ enum virtchnl_rx_hsplit {
 	VIRTCHNL_RX_HSPLIT_SPLIT_SCTP    = 8,
 };
 
+enum virtchnl_bw_limit_type {
+	VIRTCHNL_BW_SHAPER = 0,
+};
+
 #define VIRTCHNL_ETH_LENGTH_OF_ADDRESS	6
 /* END GENERIC DEFINES */
 
@@ -130,6 +134,7 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 	/* opcodes 34, 35, 36, and 37 are reserved */
+	VIRTCHNL_OP_DCF_CONFIG_BW = 37,
 	VIRTCHNL_OP_DCF_VLAN_OFFLOAD = 38,
 	VIRTCHNL_OP_DCF_CMD_DESC = 39,
 	VIRTCHNL_OP_DCF_CMD_BUFF = 40,
@@ -152,6 +157,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
 	VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2 = 58,
 	VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2 = 59,
+	VIRTCHNL_OP_GET_QOS_CAPS = 66,
+	VIRTCHNL_OP_CONFIG_TC_MAP = 67,
 	VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
 	VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
 	VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
@@ -398,6 +405,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+#define VIRTCHNL_VF_OFFLOAD_TC		BIT(29)
 #define VIRTCHNL_VF_CAP_DCF			BIT(30)
 	/* BIT(31) is reserved */
 
@@ -1285,6 +1293,13 @@ struct virtchnl_filter {
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
 
+struct virtchnl_shaper_bw {
+	u32 committed;
+	u32 peak;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw);
+
 /* VIRTCHNL_OP_DCF_GET_VSI_MAP
  * VF sends this message to get VSI mapping table.
  * PF responds with an indirect message containing VF's
@@ -1357,6 +1372,32 @@ struct virtchnl_dcf_vlan_offload {
 
 VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_dcf_vlan_offload);
 
+/* VIRTCHNL_OP_DCF_CONFIG_BW
+ * VF send this message to set the bandwidth configuration of each
+ * TC with a specific vf id. If vf id is 0xffff, it is used to configure
+ * TC node bandwidth directly.
+ */
+struct virtchnl_dcf_bw_cfg {
+	u8 tc_id;
+	u8 pad[3];
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_dcf_bw_cfg);
+
+struct virtchnl_dcf_bw_cfg_list {
+#define VIRTCHNL_DCF_TC_LEVEL 0xffff
+	u16 vf_id;
+	u16 num_elem;
+	struct virtchnl_dcf_bw_cfg cfg[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_dcf_bw_cfg_list);
+
 struct virtchnl_supported_rxdids {
 	/* see enum virtchnl_rx_desc_id_bitmasks */
 	u64 supported_rxdids;
@@ -1767,6 +1808,62 @@ struct virtchnl_fdir_del {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 
+/* VIRTCHNL_OP_GET_QOS_CAPS
+ * VF sends this message to get its QoS Caps, such as
+ * TC number, Arbiter and Bandwidth.
+ */
+struct virtchnl_qos_cap_elem {
+	u8 tc_id;
+	u8 prio_of_tc;
+#define VIRTCHNL_ABITER_STRICT      0
+#define VIRTCHNL_ABITER_ETS         2
+	u8 arbiter;
+#define VIRTCHNL_STRICT_WEIGHT      1
+	u8 weight;
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem);
+
+struct virtchnl_qos_cap_list {
+	u16 vsi_id;
+	u16 num_elem;
+	struct virtchnl_qos_cap_elem cap[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_qos_cap_list);
+
+/* VIRTCHNL_OP_CONFIG_TC_MAP
+ * VF sends message virtchnl_queue_tc_mapping to set queue to tc
+ * mapping for all the Tx and Rx queues with a specified VSI, and
+ * would get response about bitmap of valid user priorities
+ * associated with queues.
+ */
+struct virtchnl_queue_tc_mapping {
+	u16 vsi_id;
+	u16 num_tc;
+	u16 num_queue_pairs;
+	u8 pad[2];
+	union {
+		struct {
+			u16 start_queue_id;
+			u16 queue_count;
+		} req;
+		struct {
+#define VIRTCHNL_USER_PRIO_TYPE_UP	0
+#define VIRTCHNL_USER_PRIO_TYPE_DSCP	1
+			u16 prio_type;
+			u16 valid_prio_bitmap;
+		} resp;
+	} tc[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
+
 /* VIRTCHNL_OP_QUERY_FDIR_FILTER
  * VF sends this request to PF by filling out vsi_id,
  * flow_id and reset_counter. PF will return query_info
@@ -2117,6 +2214,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DCF_GET_VSI_MAP:
 	case VIRTCHNL_OP_DCF_GET_PKG_INFO:
 		break;
+	case VIRTCHNL_OP_DCF_CONFIG_BW:
+		valid_len = sizeof(struct virtchnl_dcf_bw_cfg_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_dcf_bw_cfg_list *cfg_list =
+				(struct virtchnl_dcf_bw_cfg_list *)msg;
+			if (cfg_list->num_elem == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (cfg_list->num_elem - 1) *
+					 sizeof(struct virtchnl_dcf_bw_cfg);
+		}
+		break;
 	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		break;
 	case VIRTCHNL_OP_ADD_RSS_CFG:
@@ -2132,6 +2242,21 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_QUERY_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_query);
 		break;
+	case VIRTCHNL_OP_GET_QOS_CAPS:
+		break;
+	case VIRTCHNL_OP_CONFIG_TC_MAP:
+		valid_len = sizeof(struct virtchnl_queue_tc_mapping);
+		if (msglen >= valid_len) {
+			struct virtchnl_queue_tc_mapping *q_tc =
+				(struct virtchnl_queue_tc_mapping *)msg;
+			if (q_tc->num_tc == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (q_tc->num_tc - 1) *
+					 sizeof(q_tc->tc[0]);
+		}
+		break;
 	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
 		break;
 	case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 2/5] net/ice/base: support DCF query port ETS adminq
  2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
@ 2021-06-17 10:17   ` Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 3/5] net/ice: support DCF link status event handling Ting Xu
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-17 10:17 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

In the adminq command query port ETS function, the root node teid is
needed. However, for DCF, the root node is not initialized, which will
cause error when we refer to the variable. In this patch, we will check
whether the root node is available or not first.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/base/ice_dcb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_dcb.c b/drivers/net/ice/base/ice_dcb.c
index 0aaa5ae8c1..08c950cd9a 100644
--- a/drivers/net/ice/base/ice_dcb.c
+++ b/drivers/net/ice/base/ice_dcb.c
@@ -1483,7 +1483,8 @@ ice_aq_query_port_ets(struct ice_port_info *pi,
 		return ICE_ERR_PARAM;
 	cmd = &desc.params.port_ets;
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
-	cmd->port_teid = pi->root->info.node_teid;
+	if (pi->root)
+		cmd->port_teid = pi->root->info.node_teid;
 
 	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
 	return status;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 3/5] net/ice: support DCF link status event handling
  2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
@ 2021-06-17 10:17   ` Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-17 10:17 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

When link status changes, DCF will receive virtchnl PF event message.
Add support to handle this event, change link status and update link
info.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.h        |  6 ++++
 drivers/net/ice/ice_dcf_ethdev.c | 54 ++++++++++++++++++++++++++++++--
 drivers/net/ice/ice_dcf_parent.c | 51 ++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 0cb90b5e9f..587093b909 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -60,6 +60,10 @@ struct ice_dcf_hw {
 	uint16_t nb_msix;
 	uint16_t rxq_map[16];
 	struct virtchnl_eth_stats eth_stats_offset;
+
+	/* Link status */
+	bool link_up;
+	uint32_t link_speed;
 };
 
 int ice_dcf_execute_virtchnl_cmd(struct ice_dcf_hw *hw,
@@ -77,5 +81,7 @@ int ice_dcf_disable_queues(struct ice_dcf_hw *hw);
 int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 			struct virtchnl_eth_stats *pstats);
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
+int ice_dcf_link_update(struct rte_eth_dev *dev,
+		    __rte_unused int wait_to_complete);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index b937cbbb03..819c671c2d 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -880,11 +880,59 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
-static int
-ice_dcf_link_update(__rte_unused struct rte_eth_dev *dev,
+int
+ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete)
 {
-	return 0;
+	struct ice_dcf_adapter *ad = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &ad->real_hw;
+	struct rte_eth_link new_link;
+
+	memset(&new_link, 0, sizeof(new_link));
+
+	/* Only read status info stored in VF, and the info is updated
+	 * when receive LINK_CHANGE event from PF by virtchnl.
+	 */
+	switch (hw->link_speed) {
+	case 10:
+		new_link.link_speed = ETH_SPEED_NUM_10M;
+		break;
+	case 100:
+		new_link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+	case 1000:
+		new_link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+	case 10000:
+		new_link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	case 20000:
+		new_link.link_speed = ETH_SPEED_NUM_20G;
+		break;
+	case 25000:
+		new_link.link_speed = ETH_SPEED_NUM_25G;
+		break;
+	case 40000:
+		new_link.link_speed = ETH_SPEED_NUM_40G;
+		break;
+	case 50000:
+		new_link.link_speed = ETH_SPEED_NUM_50G;
+		break;
+	case 100000:
+		new_link.link_speed = ETH_SPEED_NUM_100G;
+		break;
+	default:
+		new_link.link_speed = ETH_SPEED_NUM_NONE;
+		break;
+	}
+
+	new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	new_link.link_status = hw->link_up ? ETH_LINK_UP :
+					     ETH_LINK_DOWN;
+	new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+				ETH_LINK_SPEED_FIXED);
+
+	return rte_eth_linkstatus_set(dev, &new_link);
 }
 
 /* Add UDP tunneling port */
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 1d7aa8bc87..0c0706316d 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -178,6 +178,44 @@ start_vsi_reset_thread(struct ice_dcf_hw *dcf_hw, bool vfr, uint16_t vf_id)
 	}
 }
 
+static uint32_t
+ice_dcf_convert_link_speed(enum virtchnl_link_speed virt_link_speed)
+{
+	uint32_t speed;
+
+	switch (virt_link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		speed = 2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		speed = 5000;
+		break;
+	default:
+		speed = 0;
+		break;
+	}
+
+	return speed;
+}
+
 void
 ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 			    uint8_t *msg, uint16_t msglen)
@@ -196,6 +234,19 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 		break;
 	case VIRTCHNL_EVENT_LINK_CHANGE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event");
+		dcf_hw->link_up = pf_msg->event_data.link_event.link_status;
+		if (dcf_hw->vf_res->vf_cap_flags &
+			VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+			dcf_hw->link_speed =
+				pf_msg->event_data.link_event_adv.link_speed;
+		} else {
+			enum virtchnl_link_speed speed;
+			speed = pf_msg->event_data.link_event.link_speed;
+			dcf_hw->link_speed = ice_dcf_convert_link_speed(speed);
+		}
+		ice_dcf_link_update(dcf_hw->eth_dev, 0);
+		rte_eth_dev_callback_process(dcf_hw->eth_dev,
+			RTE_ETH_EVENT_INTR_LSC, NULL);
 		break;
 	case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event");
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 4/5] net/ice: support QoS config VF bandwidth in DCF
  2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (2 preceding siblings ...)
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 3/5] net/ice: support DCF link status event handling Ting Xu
@ 2021-06-17 10:17   ` Ting Xu
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-17 10:17 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch supports the ETS-based QoS configuration. It enables the DCF
to configure bandwidth limits for each VF VSI of different TCs. A
hierarchy scheduler tree is built with port, TC and VSI nodes.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.c        |   6 +-
 drivers/net/ice/ice_dcf.h        |  47 +++
 drivers/net/ice/ice_dcf_ethdev.c |  13 +
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  30 ++
 drivers/net/ice/ice_dcf_sched.c  | 688 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 7 files changed, 788 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index d72a6f357e..f8b4e07d86 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -235,7 +235,8 @@ ice_dcf_get_vf_resource(struct ice_dcf_hw *hw)
 	caps = VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RX_POLLING |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED | VIRTCHNL_VF_CAP_DCF |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
+	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC |
+	       VIRTCHNL_VF_OFFLOAD_TC;
 
 	err = ice_dcf_send_cmd_req_no_irq(hw, VIRTCHNL_OP_GET_VF_RESOURCES,
 					  (uint8_t *)&caps, sizeof(caps));
@@ -668,6 +669,9 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 		}
 	}
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_TC)
+		ice_dcf_tm_conf_init(eth_dev);
+
 	hw->eth_dev = eth_dev;
 	rte_intr_callback_register(&pci_dev->intr_handle,
 				   ice_dcf_dev_interrupt_handler, hw);
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 587093b909..e74e5d7e81 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -6,6 +6,7 @@
 #define _ICE_DCF_H_
 
 #include <ethdev_driver.h>
+#include <rte_tm_driver.h>
 
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
@@ -30,6 +31,49 @@ struct dcf_virtchnl_cmd {
 	volatile int pending;
 };
 
+struct ice_dcf_tm_shaper_profile {
+	TAILQ_ENTRY(ice_dcf_tm_shaper_profile) node;
+	uint32_t shaper_profile_id;
+	uint32_t reference_count;
+	struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(ice_dcf_shaper_profile_list, ice_dcf_tm_shaper_profile);
+
+/* Struct to store Traffic Manager node configuration. */
+struct ice_dcf_tm_node {
+	TAILQ_ENTRY(ice_dcf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct ice_dcf_tm_node *parent;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(ice_dcf_tm_node_list, ice_dcf_tm_node);
+
+/* node type of Traffic Manager */
+enum ice_dcf_tm_node_type {
+	ICE_DCF_TM_NODE_TYPE_PORT,
+	ICE_DCF_TM_NODE_TYPE_TC,
+	ICE_DCF_TM_NODE_TYPE_VSI,
+	ICE_DCF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct ice_dcf_tm_conf {
+	struct ice_dcf_shaper_profile_list shaper_profile_list;
+	struct ice_dcf_tm_node *root; /* root node - port */
+	struct ice_dcf_tm_node_list tc_list; /* node list for all the TCs */
+	struct ice_dcf_tm_node_list vsi_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_vsi_node;
+	bool committed;
+};
+
 struct ice_dcf_hw {
 	struct iavf_hw avf;
 
@@ -45,6 +89,8 @@ struct ice_dcf_hw {
 	uint16_t *vf_vsi_map;
 	uint16_t pf_vsi_id;
 
+	struct ice_dcf_tm_conf tm_conf;
+	struct ice_aqc_port_ets_elem *ets_config;
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
@@ -83,5 +129,6 @@ int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
 int ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete);
+void ice_dcf_tm_conf_init(struct rte_eth_dev *dev);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 819c671c2d..219ff7cd86 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -993,6 +993,18 @@ ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 	return ret;
 }
 
+static int
+ice_dcf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+		void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &ice_dcf_tm_ops;
+
+	return 0;
+}
+
 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.dev_start               = ice_dcf_dev_start,
 	.dev_stop                = ice_dcf_dev_stop,
@@ -1017,6 +1029,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.flow_ops_get            = ice_dcf_dev_flow_ops_get,
 	.udp_tunnel_port_add	 = ice_dcf_dev_udp_tunnel_port_add,
 	.udp_tunnel_port_del	 = ice_dcf_dev_udp_tunnel_port_del,
+	.tm_ops_get              = ice_dcf_tm_ops_get,
 };
 
 static int
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index e7c9d7fe41..8510e37119 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -7,6 +7,8 @@
 
 #include "base/ice_common.h"
 #include "base/ice_adminq_cmd.h"
+#include "base/ice_dcb.h"
+#include "base/ice_sched.h"
 
 #include "ice_ethdev.h"
 #include "ice_dcf.h"
@@ -52,6 +54,7 @@ struct ice_dcf_vf_repr {
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
 
+extern const struct rte_tm_ops ice_dcf_tm_ops;
 void ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 				 uint8_t *msg, uint16_t msglen);
 int ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 0c0706316d..2403d9c259 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -264,6 +264,29 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 	}
 }
 
+static int
+ice_dcf_query_port_ets(struct ice_hw *parent_hw, struct ice_dcf_hw *real_hw)
+{
+	int ret;
+
+	real_hw->ets_config = (struct ice_aqc_port_ets_elem *)
+			ice_malloc(real_hw, sizeof(*real_hw->ets_config));
+	if (!real_hw->ets_config)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_query_port_ets(parent_hw->port_info,
+			real_hw->ets_config, sizeof(*real_hw->ets_config),
+			NULL);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "DCF Query Port ETS failed");
+		rte_free(real_hw->ets_config);
+		real_hw->ets_config = NULL;
+		return ret;
+	}
+
+	return ICE_SUCCESS;
+}
+
 static int
 ice_dcf_init_parent_hw(struct ice_hw *hw)
 {
@@ -487,6 +510,13 @@ ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev)
 		return err;
 	}
 
+	err = ice_dcf_query_port_ets(parent_hw, hw);
+	if (err) {
+		PMD_INIT_LOG(ERR, "failed to query port ets with error %d",
+			     err);
+		goto uninit_hw;
+	}
+
 	err = ice_dcf_load_pkg(parent_hw);
 	if (err) {
 		PMD_INIT_LOG(ERR, "failed to load package with error %d",
diff --git a/drivers/net/ice/ice_dcf_sched.c b/drivers/net/ice/ice_dcf_sched.c
new file mode 100644
index 0000000000..0187191539
--- /dev/null
+++ b/drivers/net/ice/ice_dcf_sched.c
@@ -0,0 +1,688 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "base/ice_sched.h"
+#include "ice_dcf_ethdev.h"
+
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+				   uint32_t shaper_profile_id,
+				   struct rte_tm_error *error);
+
+const struct rte_tm_ops ice_dcf_tm_ops = {
+	.shaper_profile_add = ice_dcf_shaper_profile_add,
+	.shaper_profile_delete = ice_dcf_shaper_profile_del,
+	.hierarchy_commit = ice_dcf_hierarchy_commit,
+	.node_add = ice_dcf_node_add,
+	.node_delete = ice_dcf_node_delete,
+};
+
+void
+ice_dcf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+
+	/* initialize shaper profile list */
+	TAILQ_INIT(&hw->tm_conf.shaper_profile_list);
+
+	/* initialize node configuration */
+	hw->tm_conf.root = NULL;
+	TAILQ_INIT(&hw->tm_conf.tc_list);
+	TAILQ_INIT(&hw->tm_conf.vsi_list);
+	hw->tm_conf.nb_tc_node = 0;
+	hw->tm_conf.nb_vsi_node = 0;
+	hw->tm_conf.committed = false;
+}
+
+static inline struct ice_dcf_tm_node *
+dcf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum ice_dcf_tm_node_type *node_type)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct ice_dcf_tm_node_list *tc_list = &hw->tm_conf.tc_list;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (hw->tm_conf.root && hw->tm_conf.root->id == node_id) {
+		*node_type = ICE_DCF_TM_NODE_TYPE_PORT;
+		return hw->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, vsi_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_VSI;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct ice_dcf_tm_shaper_profile *
+dcf_shaper_profile_search(struct rte_eth_dev *dev,
+			   uint32_t shaper_profile_id)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_shaper_profile_list *shaper_profile_list =
+		&hw->tm_conf.shaper_profile_list;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	TAILQ_FOREACH(shaper_profile, shaper_profile_list, node) {
+		if (shaper_profile_id == shaper_profile->shaper_profile_id)
+			return shaper_profile;
+	}
+
+	return NULL;
+}
+
+static int
+dcf_node_param_check(struct ice_dcf_hw *hw, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= 8 * hw->num_vfs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type parent_node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_tm_shaper_profile *shaper_profile = NULL;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *parent_node;
+	struct ice_dcf_tm_node *tm_node;
+	uint16_t tc_nb = 1;
+	int i, ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = dcf_node_param_check(hw, node_id, priority, weight,
+				   params, error);
+	if (ret)
+		return ret;
+
+	for (i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (hw->ets_config->tc_valid_bits & (1 << i))
+			tc_nb++;
+	}
+
+	/* check if the node is already existed */
+	if (dcf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* check the shaper profile id */
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = dcf_shaper_profile_search(dev,
+			params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
+	}
+
+	/* add root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != ICE_DCF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (hw->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("ice_dcf_tm_node",
+				      sizeof(struct ice_dcf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		hw->tm_conf.root = tm_node;
+
+		return 0;
+	}
+
+	/* TC or vsi node */
+	/* check the parent node */
+	parent_node = dcf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != ICE_DCF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != ICE_DCF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not port or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the TC node number */
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (hw->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the vsi node number */
+		if (parent_node->reference_count >= hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many VSI for one TC";
+			return -EINVAL;
+		}
+		/* check the vsi node id */
+		if (node_id > tc_nb * hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large VSI id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or vsi node */
+	tm_node = rte_zmalloc("ice_dcf_tm_node",
+			      sizeof(struct ice_dcf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->priority = priority;
+	tm_node->weight = weight;
+	tm_node->shaper_profile = shaper_profile;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = hw->tm_conf.nb_tc_node;
+		hw->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.vsi_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		hw->tm_conf.nb_vsi_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	/* increase the reference counter of the shaper profile */
+	if (shaper_profile)
+		shaper_profile->reference_count++;
+
+	return 0;
+}
+
+static int
+ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = dcf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
+		rte_free(tm_node);
+		hw->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or VSI node */
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
+	tm_node->parent->reference_count--;
+	if (node_type == ICE_DCF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		hw->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		hw->tm_conf.nb_vsi_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+dcf_shaper_profile_param_check(struct rte_tm_shaper_params *profile,
+				struct rte_tm_error *error)
+{
+	/* min bucket size not supported */
+	if (profile->committed.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE;
+		error->message = "committed bucket size not supported";
+		return -EINVAL;
+	}
+	/* max bucket size not supported */
+	if (profile->peak.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE;
+		error->message = "peak bucket size not supported";
+		return -EINVAL;
+	}
+	/* length adjustment not supported */
+	if (profile->pkt_length_adjust) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN;
+		error->message = "packet length adjustment not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	int ret;
+
+	if (!profile || !error)
+		return -EINVAL;
+
+	ret = dcf_shaper_profile_param_check(profile, error);
+	if (ret)
+		return ret;
+
+	shaper_profile = dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID exist";
+		return -EINVAL;
+	}
+
+	shaper_profile = rte_zmalloc("ice_dcf_tm_shaper_profile",
+				     sizeof(struct ice_dcf_tm_shaper_profile),
+				     0);
+	if (!shaper_profile)
+		return -ENOMEM;
+	shaper_profile->shaper_profile_id = shaper_profile_id;
+	rte_memcpy(&shaper_profile->profile, profile,
+			 sizeof(struct rte_tm_shaper_params));
+	TAILQ_INSERT_TAIL(&hw->tm_conf.shaper_profile_list,
+			  shaper_profile, node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	if (!error)
+		return -EINVAL;
+
+	shaper_profile = dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (!shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID not exist";
+		return -EINVAL;
+	}
+
+	/* don't delete a profile if it's used by one or several nodes */
+	if (shaper_profile->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
+		error->message = "profile in use";
+		return -EINVAL;
+	}
+
+	TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list, shaper_profile, node);
+	rte_free(shaper_profile);
+
+	return 0;
+}
+
+static int
+ice_dcf_set_vf_bw(struct ice_dcf_hw *hw,
+			struct virtchnl_dcf_bw_cfg_list *vf_bw,
+			uint16_t len)
+{
+	struct dcf_virtchnl_cmd args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.v_op = VIRTCHNL_OP_DCF_CONFIG_BW;
+	args.req_msg = (uint8_t *)vf_bw;
+	args.req_msglen  = len;
+	err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "fail to execute command %s",
+			    "VIRTCHNL_OP_DCF_CONFIG_VF_TC");
+	return err;
+}
+
+static int
+ice_dcf_validate_tc_bw(struct virtchnl_dcf_bw_cfg_list *tc_bw,
+			uint32_t port_bw)
+{
+	struct virtchnl_dcf_bw_cfg *cfg;
+	bool lowest_cir_mark = false;
+	u32 total_peak, rest_peak;
+	u32 committed, peak;
+	int i;
+
+	total_peak = 0;
+	for (i = 0; i < tc_bw->num_elem; i++)
+		total_peak += tc_bw->cfg[i].shaper.peak;
+
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		cfg = &tc_bw->cfg[i];
+		peak = cfg->shaper.peak;
+		committed = cfg->shaper.committed;
+		rest_peak = total_peak - peak;
+
+		if (lowest_cir_mark && peak == 0) {
+			PMD_DRV_LOG(ERR, "Max bandwidth must be configured for TC%u \n",
+				cfg->tc_id);
+			return -EINVAL;
+		}
+
+		if (!lowest_cir_mark && committed)
+			lowest_cir_mark = true;
+
+		if (committed && committed + rest_peak > port_bw) {
+			PMD_DRV_LOG(ERR, "Total value of TC%u min bandwidth and other TCs' max bandwidth %ukbps should be less than port link speed %ukbps \n",
+				cfg->tc_id, committed + rest_peak, port_bw);
+			return -EINVAL;
+		}
+
+		if (committed && committed < ICE_SCHED_MIN_BW) {
+			PMD_DRV_LOG(ERR, "If TC%u min Tx bandwidth is set, it cannot be less than 500Kbps \n",
+				cfg->tc_id);
+			return -EINVAL;
+		}
+
+		if (peak && committed > peak) {
+			PMD_DRV_LOG(ERR, "TC%u Min Tx bandwidth cannot be greater than max Tx bandwidth \n",
+				cfg->tc_id);
+			return -EINVAL;
+		}
+
+		if (peak > port_bw) {
+			PMD_DRV_LOG(ERR, "TC%u max Tx bandwidth %uKbps is greater than current link speed %uKbps \n",
+				cfg->tc_id, peak, port_bw);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct virtchnl_dcf_bw_cfg_list *vf_bw;
+	struct virtchnl_dcf_bw_cfg_list *tc_bw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct rte_tm_shaper_params *profile;
+	struct ice_dcf_tm_node *tm_node;
+	uint32_t port_bw, cir_total;
+	uint16_t size, vf_id;
+	int num_elem = 0;
+	int ret, i;
+
+	size = sizeof(struct virtchnl_dcf_bw_cfg_list) +
+		sizeof(struct virtchnl_dcf_bw_cfg) *
+		(hw->tm_conf.nb_tc_node - 1);
+	vf_bw = rte_zmalloc("vf_bw", size, 0);
+	if (!vf_bw)
+		return ICE_ERR_NO_MEMORY;
+	tc_bw = rte_zmalloc("tc_bw", size, 0);
+	if (!tc_bw)
+		return ICE_ERR_NO_MEMORY;
+
+	/* port bandwidth (Kbps) */
+	port_bw = hw->link_speed * 1000;
+	cir_total = 0;
+
+	/* init tc bw configuration */
+	tc_bw->vf_id = VIRTCHNL_DCF_TC_LEVEL;
+	tc_bw->num_elem = hw->tm_conf.nb_tc_node;
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		tc_bw->cfg[i].tc_id = i;
+		tc_bw->cfg[i].type = VIRTCHNL_BW_SHAPER;
+	}
+
+	for (vf_id = 0; vf_id < hw->num_vfs; vf_id++) {
+		num_elem = 0;
+		vf_bw->vf_id = vf_id;
+		TAILQ_FOREACH(tm_node, vsi_list, node) {
+			/* scan the nodes belong to one VSI */
+			if (tm_node->id - hw->num_vfs * tm_node->tc != vf_id)
+				continue;
+			vf_bw->cfg[num_elem].tc_id = tm_node->tc;
+			vf_bw->cfg[num_elem].type = VIRTCHNL_BW_SHAPER;
+			if (tm_node->shaper_profile) {
+				/* Transfer from Byte per seconds to Kbps */
+				profile = &tm_node->shaper_profile->profile;
+				vf_bw->cfg[num_elem].shaper.peak =
+				profile->peak.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].shaper.committed =
+				profile->committed.rate / 1000 * BITS_PER_BYTE;
+			}
+			cir_total += vf_bw->cfg[num_elem].shaper.committed;
+			num_elem++;
+
+			/* update tc node bw configuration */
+			tc_bw->cfg[tm_node->tc].shaper.peak +=
+				vf_bw->cfg[num_elem].shaper.peak;
+			tc_bw->cfg[tm_node->tc].shaper.committed +=
+				vf_bw->cfg[num_elem].shaper.committed;
+		}
+
+		/* check if total CIR is larger than port bandwidth */
+		if (cir_total > port_bw) {
+			PMD_DRV_LOG(ERR, "Total CIR of all VFs is larger than port bandwidth");
+			return ICE_ERR_PARAM;
+		}
+		vf_bw->num_elem = num_elem;
+		ret = ice_dcf_set_vf_bw(hw, vf_bw, size);
+		if (ret)
+			return ret;
+		memset(vf_bw, 0, size);
+	}
+
+	/* check and commit tc node bw configuration */
+	ret = ice_dcf_validate_tc_bw(tc_bw, port_bw);
+	if (ret)
+		return ret;
+	ret = ice_dcf_set_vf_bw(hw, tc_bw, size);
+	if (ret)
+		return ret;
+
+	hw->tm_conf.committed = true;
+	return ICE_SUCCESS;
+}
diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
index 65750d3501..0b86d74a49 100644
--- a/drivers/net/ice/meson.build
+++ b/drivers/net/ice/meson.build
@@ -70,6 +70,7 @@ endif
 sources += files('ice_dcf.c',
          'ice_dcf_vf_representor.c',
          'ice_dcf_ethdev.c',
-         'ice_dcf_parent.c')
+         'ice_dcf_parent.c',
+	 'ice_dcf_sched.c')
 
 headers = files('rte_pmd_ice.h')
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v2 5/5] net/iavf: query QoS cap and set queue TC mapping
  2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (3 preceding siblings ...)
  2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
@ 2021-06-17 10:17   ` Ting Xu
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-17 10:17 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch added the support for VF to config the ETS-based Tx QoS,
including querying current QoS configuration from PF and config queue TC
mapping. PF QoS is configured in advance and the queried info is
provided to the user for future usage. VF queues are mapped to different
TCs in PF through virtchnl.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h        |  45 +++
 drivers/net/iavf/iavf_ethdev.c |  31 ++
 drivers/net/iavf/iavf_tm.c     | 663 +++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c  |  56 ++-
 drivers/net/iavf/meson.build   |   1 +
 5 files changed, 795 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4f5811ae87..77ddf15f42 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -6,6 +6,8 @@
 #define _IAVF_ETHDEV_H_
 
 #include <rte_kvargs.h>
+#include <rte_tm_driver.h>
+
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
 #include <iavf_type.h>
@@ -82,6 +84,8 @@
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
 
+#define IAVF_BITS_PER_BYTE 8
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -129,6 +133,38 @@ enum iavf_aq_result {
 	IAVF_MSG_CMD,      /* Read async command result */
 };
 
+/* Struct to store Traffic Manager node configuration. */
+struct iavf_tm_node {
+	TAILQ_ENTRY(iavf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct iavf_tm_node *parent;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
+
+/* node type of Traffic Manager */
+enum iavf_tm_node_type {
+	IAVF_TM_NODE_TYPE_PORT,
+	IAVF_TM_NODE_TYPE_TC,
+	IAVF_TM_NODE_TYPE_QUEUE,
+	IAVF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct iavf_tm_conf {
+	struct iavf_tm_node *root; /* root node - vf vsi */
+	struct iavf_tm_node_list tc_list; /* node list for all the TCs */
+	struct iavf_tm_node_list queue_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_queue_node;
+	bool committed;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -175,6 +211,9 @@ struct iavf_info {
 	struct iavf_fdir_info fdir; /* flow director info */
 	/* indicate large VF support enabled or not */
 	bool lv_enabled;
+
+	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_tm_conf tm_conf;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -344,4 +383,10 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			uint32_t mc_addrs_num, bool add);
 int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
+int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+			struct virtchnl_queue_tc_mapping *q_tc_mapping,
+			uint16_t size);
+void iavf_tm_conf_init(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops iavf_tm_ops;
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index cb38fe81e1..e0a03a0bee 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -122,6 +122,7 @@ static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
 static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
 			uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
@@ -200,8 +201,21 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
 	.flow_ops_get               = iavf_dev_flow_ops_get,
 	.tx_done_cleanup	    = iavf_dev_tx_done_cleanup,
 	.get_monitor_addr           = iavf_get_monitor_addr,
+	.tm_ops_get                 = iavf_tm_ops_get,
 };
 
+static int
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+			void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &iavf_tm_ops;
+
+	return 0;
+}
+
 static int
 iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
@@ -806,6 +820,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
 				      dev->data->nb_tx_queues);
 	num_queue_pairs = vf->num_queue_pairs;
 
+	if (iavf_get_qos_cap(adapter)) {
+		PMD_INIT_LOG(ERR, "Failed to get qos capability");
+		return -1;
+	}
+
 	if (iavf_init_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "failed to do Queue init");
 		return -1;
@@ -2090,6 +2109,15 @@ iavf_init_vf(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
 		goto err_api;
 	}
+
+	bufsz = sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+	vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+	if (!vf->qos_cap) {
+		PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+		goto err_api;
+	}
+
 	if (iavf_get_vf_resource(adapter) != 0) {
 		PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
 		goto err_alloc;
@@ -2131,6 +2159,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
 	rte_free(vf->rss_key);
 	rte_free(vf->rss_lut);
 err_alloc:
+	rte_free(vf->qos_cap);
 	rte_free(vf->vf_res);
 	vf->vsi_res = NULL;
 err_api:
@@ -2299,6 +2328,8 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
 
 	iavf_default_rss_disable(adapter);
 
+	iavf_tm_conf_init(eth_dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
new file mode 100644
index 0000000000..5a33ce367e
--- /dev/null
+++ b/drivers/net/iavf/iavf_tm.c
@@ -0,0 +1,663 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "iavf.h"
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error);
+static int iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error);
+static int iavf_node_capabilities_get(struct rte_eth_dev *dev,
+				      uint32_t node_id,
+				      struct rte_tm_node_capabilities *cap,
+				      struct rte_tm_error *error);
+static int iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error);
+
+const struct rte_tm_ops iavf_tm_ops = {
+	.node_add = iavf_tm_node_add,
+	.node_delete = iavf_tm_node_delete,
+	.capabilities_get = iavf_tm_capabilities_get,
+	.level_capabilities_get = iavf_level_capabilities_get,
+	.node_capabilities_get = iavf_node_capabilities_get,
+	.node_type_get = iavf_node_type_get,
+	.hierarchy_commit = iavf_hierarchy_commit,
+};
+
+void
+iavf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	/* initialize node configuration */
+	vf->tm_conf.root = NULL;
+	TAILQ_INIT(&vf->tm_conf.tc_list);
+	TAILQ_INIT(&vf->tm_conf.queue_list);
+	vf->tm_conf.nb_tc_node = 0;
+	vf->tm_conf.nb_queue_node = 0;
+	vf->tm_conf.committed = false;
+}
+
+
+static inline struct iavf_tm_node *
+iavf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum iavf_tm_node_type *node_type)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node_list *tc_list = &vf->tm_conf.tc_list;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+
+	if (vf->tm_conf.root && vf->tm_conf.root->id == node_id) {
+		*node_type = IAVF_TM_NODE_TYPE_PORT;
+		return vf->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_QUEUE;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+iavf_node_param_check(struct iavf_info *vf, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shaper profile */
+	if (params->shaper_profile_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+		error->message = "shaper profile not supported";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= vf->num_queue_pairs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error)
+{
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!is_leaf || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type == IAVF_TM_NODE_TYPE_QUEUE)
+		*is_leaf = true;
+	else
+		*is_leaf = false;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	enum iavf_tm_node_type parent_node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+	struct iavf_tm_node *parent_node;
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+	int ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = iavf_node_param_check(vf, node_id, priority, weight,
+				    params, error);
+	if (ret)
+		return ret;
+
+	/* check if the node is already existed */
+	if (iavf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != IAVF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (vf->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("iavf_tm_node",
+				      sizeof(struct iavf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		vf->tm_conf.root = tm_node;
+		return 0;
+	}
+
+	/* TC or queue node */
+	/* check the parent node */
+	parent_node = iavf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != IAVF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not root or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the node number */
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (vf->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the queue number */
+		if (parent_node->reference_count >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many queues";
+			return -EINVAL;
+		}
+		if (node_id >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large queue id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or queue node */
+	tm_node = rte_zmalloc("iavf_tm_node",
+			      sizeof(struct iavf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = vf->tm_conf.nb_tc_node;
+		vf->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.queue_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		vf->tm_conf.nb_queue_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == IAVF_TM_NODE_TYPE_PORT) {
+		rte_free(tm_node);
+		vf->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or queue node */
+	tm_node->parent->reference_count--;
+	if (node_type == IAVF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		vf->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		vf->tm_conf.nb_queue_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (tc_nb > vf->vf_res->num_queue_pairs)
+		return -EINVAL;
+
+	error->type = RTE_TM_ERROR_TYPE_NONE;
+
+	/* set all the parameters to 0 first. */
+	memset(cap, 0, sizeof(struct rte_tm_capabilities));
+
+	/**
+	 * support port + TCs + queues
+	 * here shows the max capability not the current configuration.
+	 */
+	cap->n_nodes_max = 1 + IAVF_MAX_TRAFFIC_CLASS
+		+ vf->num_queue_pairs;
+	cap->n_levels_max = 3; /* port, TC, queue */
+	cap->non_leaf_nodes_identical = 1;
+	cap->leaf_nodes_identical = 1;
+	cap->shaper_n_max = cap->n_nodes_max;
+	cap->shaper_private_n_max = cap->n_nodes_max;
+	cap->shaper_private_dual_rate_n_max = 0;
+	cap->shaper_private_rate_min = 0;
+	/* GBps */
+	cap->shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->shaper_private_packet_mode_supported = 0;
+	cap->shaper_private_byte_mode_supported = 1;
+	cap->shaper_shared_n_max = 0;
+	cap->shaper_shared_n_nodes_per_shaper_max = 0;
+	cap->shaper_shared_n_shapers_per_node_max = 0;
+	cap->shaper_shared_dual_rate_n_max = 0;
+	cap->shaper_shared_rate_min = 0;
+	cap->shaper_shared_rate_max = 0;
+	cap->shaper_shared_packet_mode_supported = 0;
+	cap->shaper_shared_byte_mode_supported = 0;
+	cap->sched_n_children_max = vf->num_queue_pairs;
+	cap->sched_sp_n_priorities_max = 1;
+	cap->sched_wfq_n_children_per_group_max = 0;
+	cap->sched_wfq_n_groups_max = 0;
+	cap->sched_wfq_weight_max = 1;
+	cap->sched_wfq_packet_mode_supported = 0;
+	cap->sched_wfq_byte_mode_supported = 0;
+	cap->cman_head_drop_supported = 0;
+	cap->dynamic_update_mask = 0;
+	cap->shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD;
+	cap->shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS;
+	cap->cman_wred_context_n_max = 0;
+	cap->cman_wred_context_private_n_max = 0;
+	cap->cman_wred_context_shared_n_max = 0;
+	cap->cman_wred_context_shared_n_nodes_per_context_max = 0;
+	cap->cman_wred_context_shared_n_contexts_per_node_max = 0;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (level_id >= IAVF_TM_NODE_TYPE_MAX) {
+		error->type = RTE_TM_ERROR_TYPE_LEVEL_ID;
+		error->message = "too deep level";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (level_id == IAVF_TM_NODE_TYPE_PORT) {
+		cap->n_nodes_max = 1;
+		cap->n_nodes_nonleaf_max = 1;
+		cap->n_nodes_leaf_max = 0;
+	} else if (level_id == IAVF_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = vf->num_queue_pairs;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = vf->num_queue_pairs;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IAVF_TM_NODE_TYPE_QUEUE) {
+		cap->nonleaf.shaper_private_supported = true;
+		cap->nonleaf.shaper_private_dual_rate_supported = false;
+		cap->nonleaf.shaper_private_rate_min = 0;
+		/* GBps */
+		cap->nonleaf.shaper_private_rate_max =
+			vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+		cap->nonleaf.shaper_private_packet_mode_supported = 0;
+		cap->nonleaf.shaper_private_byte_mode_supported = 1;
+		cap->nonleaf.shaper_shared_n_max = 0;
+		cap->nonleaf.shaper_shared_packet_mode_supported = 0;
+		cap->nonleaf.shaper_shared_byte_mode_supported = 0;
+		if (level_id == IAVF_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IAVF_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				vf->num_queue_pairs;
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+		cap->nonleaf.sched_wfq_packet_mode_supported = 0;
+		cap->nonleaf.sched_wfq_byte_mode_supported = 0;
+		cap->nonleaf.stats_mask = 0;
+
+		return 0;
+	}
+
+	/* queue node */
+	cap->leaf.shaper_private_supported = false;
+	cap->leaf.shaper_private_dual_rate_supported = false;
+	cap->leaf.shaper_private_rate_min = 0;
+	/* GBps */
+	cap->leaf.shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->leaf.shaper_private_packet_mode_supported = 0;
+	cap->leaf.shaper_private_byte_mode_supported = 1;
+	cap->leaf.shaper_shared_n_max = 0;
+	cap->leaf.shaper_shared_packet_mode_supported = 0;
+	cap->leaf.shaper_shared_byte_mode_supported = 0;
+	cap->leaf.cman_head_drop_supported = false;
+	cap->leaf.cman_wred_context_private_supported = true;
+	cap->leaf.cman_wred_context_shared_n_max = 0;
+	cap->leaf.stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_node_capabilities_get(struct rte_eth_dev *dev,
+			   uint32_t node_id,
+			   struct rte_tm_node_capabilities *cap,
+			   struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type;
+	struct virtchnl_qos_cap_elem tc_cap;
+	struct iavf_tm_node *tm_node;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "not support capability get";
+		return -EINVAL;
+	}
+
+	tc_cap = vf->qos_cap->cap[tm_node->tc];
+	if (tc_cap.tc_id != tm_node->tc) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "tc not match";
+		return -EINVAL;
+	}
+
+	cap->shaper_private_supported = true;
+	cap->shaper_private_dual_rate_supported = false;
+	cap->shaper_private_rate_min = tc_cap.shaper.committed;
+	cap->shaper_private_rate_max = tc_cap.shaper.peak;
+	cap->shaper_shared_n_max = 0;
+	cap->nonleaf.sched_n_children_max = vf->num_queue_pairs;
+	cap->nonleaf.sched_sp_n_priorities_max = 1;
+	cap->nonleaf.sched_wfq_n_children_per_group_max = 1;
+	cap->nonleaf.sched_wfq_n_groups_max = 0;
+	cap->nonleaf.sched_wfq_weight_max = tc_cap.weight;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct virtchnl_queue_tc_mapping *q_tc_mapping;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+	uint16_t size;
+	int index = 0, node_committed = 0;
+	int ret, i;
+
+	size = sizeof(*q_tc_mapping) + sizeof(q_tc_mapping->tc[0]) *
+		(vf->qos_cap->num_elem - 1);
+	q_tc_mapping = rte_zmalloc("q_tc", size, 0);
+	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
+	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
+	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		q_tc_mapping->tc[tm_node->tc].req.queue_count++;
+		node_committed++;
+	}
+
+	for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+		q_tc_mapping->tc[i].req.start_queue_id = index;
+		index += q_tc_mapping->tc[i].req.queue_count;
+	}
+	if (node_committed < vf->num_queue_pairs) {
+		PMD_DRV_LOG(ERR, "queue node is less than allocated queue pairs");
+		return IAVF_ERR_PARAM;
+	}
+
+	ret = iavf_set_q_tc_map(dev, q_tc_mapping, size);
+	if (ret)
+		return ret;
+
+	return IAVF_SUCCESS;
+}
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 5d57e8b541..daa1b3755c 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -467,7 +467,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 		VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 		VIRTCHNL_VF_OFFLOAD_CRC |
 		VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-		VIRTCHNL_VF_LARGE_NUM_QPAIRS;
+		VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+		VIRTCHNL_VF_OFFLOAD_TC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -1550,6 +1551,59 @@ iavf_set_hena(struct iavf_adapter *adapter, uint64_t hena)
 	return err;
 }
 
+int
+iavf_get_qos_cap(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	uint32_t len;
+	int err;
+
+	args.ops = VIRTCHNL_OP_GET_QOS_CAPS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+	err = iavf_execute_vf_cmd(adapter, &args);
+
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_VF_RESOURCE");
+		return -1;
+	}
+
+	len =  sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+
+	rte_memcpy(vf->qos_cap, args.out_buffer,
+		   RTE_MIN(args.out_size, len));
+
+	return 0;
+}
+
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+		struct virtchnl_queue_tc_mapping *q_tc_mapping, uint16_t size)
+{
+	struct iavf_adapter *adapter =
+			IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_cmd_info args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.ops = VIRTCHNL_OP_CONFIG_TC_MAP;
+	args.in_args = (uint8_t *)q_tc_mapping;
+	args.in_args_size = size;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	err = iavf_execute_vf_cmd(adapter, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "Failed to execute command of"
+			    " VIRTCHNL_OP_CONFIG_TC_MAP");
+	return err;
+}
+
 int
 iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			struct rte_ether_addr *mc_addrs,
diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build
index 6f222a9e87..f2010a8337 100644
--- a/drivers/net/iavf/meson.build
+++ b/drivers/net/iavf/meson.build
@@ -19,6 +19,7 @@ sources = files(
         'iavf_generic_flow.c',
         'iavf_fdir.c',
         'iavf_hash.c',
+        'iavf_tm.c',
 )
 
 if arch_subdir == 'x86'
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (5 preceding siblings ...)
  2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-06-25  9:31 ` Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
                     ` (4 more replies)
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (2 subsequent siblings)
  9 siblings, 5 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-25  9:31 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch enables the ETS-based Tx QoS for IAVF. Kernel tool is used to
configure ETS first. DCF is used to set bandwidth limit for VFs of each
TC. IAVF is supported to query QoS capability and set queue TC mapping.
Traffic Management API is utilized to configure the QoS hierarchy
scheduler tree. The scheduler tree will be passed to hardware to enable
all above functions.

Ting Xu (5):
  common/iavf: support ETS-based QoS offload configuration
  net/ice/base: support DCF query port ETS adminq
  net/ice: support DCF link status event handling
  net/ice: support QoS config VF bandwidth in DCF
  net/iavf: query QoS cap and set queue TC mapping

 drivers/common/iavf/iavf_type.h  |   2 +
 drivers/common/iavf/virtchnl.h   | 131 ++++++
 drivers/net/iavf/iavf.h          |  45 ++
 drivers/net/iavf/iavf_ethdev.c   |  31 ++
 drivers/net/iavf/iavf_tm.c       | 667 +++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c    |  56 ++-
 drivers/net/iavf/meson.build     |   1 +
 drivers/net/ice/base/ice_dcb.c   |   3 +-
 drivers/net/ice/ice_dcf.c        |   6 +-
 drivers/net/ice/ice_dcf.h        |  53 +++
 drivers/net/ice/ice_dcf_ethdev.c |  67 ++-
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  81 ++++
 drivers/net/ice/ice_dcf_sched.c  | 697 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 15 files changed, 1839 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 1/5] common/iavf: support ETS-based QoS offload configuration
  2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-06-25  9:31   ` Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-25  9:31 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch adds new virtchnl opcodes and structures for QoS
configuration, which includes:
1. VIRTCHNL_VF_OFFLOAD_TC, to negotiate the capability supporting QoS
configuration. If VF and PF both have this flag, then the ETS-based QoS
offload function is supported.
2. VIRTCHNL_OP_DCF_CONFIG_BW, DCF is supposed to configure min and max
bandwidth for each VF per enabled TCs. To make the VSI node bandwidth
configuration work, DCF also needs to configure TC node bandwidth
directly.
3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such
as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The
configuration is previously set by DCB and DCF, and now is the potential
QoS capability of VF. VF can take it as reference to configure queue TC
mapping.
4. VIRTCHNL_OP_CONFIG_TC_MAP, set VF queues to TC mapping for all Tx and
Rx queues. Queues mapping to one TC should be continuous and all
allocated queues should be mapped.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/common/iavf/iavf_type.h |   2 +
 drivers/common/iavf/virtchnl.h  | 131 ++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)

diff --git a/drivers/common/iavf/iavf_type.h b/drivers/common/iavf/iavf_type.h
index f3815d523b..73dfb47e70 100644
--- a/drivers/common/iavf/iavf_type.h
+++ b/drivers/common/iavf/iavf_type.h
@@ -141,6 +141,8 @@ enum iavf_debug_mask {
 #define IAVF_PHY_LED_MODE_MASK			0xFFFF
 #define IAVF_PHY_LED_MODE_ORIG			0x80000000
 
+#define IAVF_MAX_TRAFFIC_CLASS	8
+
 /* Memory types */
 enum iavf_memset_type {
 	IAVF_NONDMA_MEM = 0,
diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 197edce8a1..1cf0866124 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -85,6 +85,10 @@ enum virtchnl_rx_hsplit {
 	VIRTCHNL_RX_HSPLIT_SPLIT_SCTP    = 8,
 };
 
+enum virtchnl_bw_limit_type {
+	VIRTCHNL_BW_SHAPER = 0,
+};
+
 #define VIRTCHNL_ETH_LENGTH_OF_ADDRESS	6
 /* END GENERIC DEFINES */
 
@@ -130,6 +134,7 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 	/* opcodes 34, 35, 36, and 37 are reserved */
+	VIRTCHNL_OP_DCF_CONFIG_BW = 37,
 	VIRTCHNL_OP_DCF_VLAN_OFFLOAD = 38,
 	VIRTCHNL_OP_DCF_CMD_DESC = 39,
 	VIRTCHNL_OP_DCF_CMD_BUFF = 40,
@@ -152,6 +157,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
 	VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2 = 58,
 	VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2 = 59,
+	VIRTCHNL_OP_GET_QOS_CAPS = 66,
+	VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP = 67,
 	VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
 	VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
 	VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
@@ -398,6 +405,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+#define VIRTCHNL_VF_OFFLOAD_QOS		BIT(29)
 #define VIRTCHNL_VF_CAP_DCF			BIT(30)
 	/* BIT(31) is reserved */
 
@@ -1285,6 +1293,14 @@ struct virtchnl_filter {
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
 
+struct virtchnl_shaper_bw {
+	/* Unit is Kbps */
+	u32 committed;
+	u32 peak;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw);
+
 /* VIRTCHNL_OP_DCF_GET_VSI_MAP
  * VF sends this message to get VSI mapping table.
  * PF responds with an indirect message containing VF's
@@ -1357,6 +1373,37 @@ struct virtchnl_dcf_vlan_offload {
 
 VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_dcf_vlan_offload);
 
+struct virtchnl_dcf_bw_cfg {
+	u8 tc_num;
+#define VIRTCHNL_DCF_BW_CIR		BIT(0)
+#define VIRTCHNL_DCF_BW_PIR		BIT(1)
+	u8 bw_type;
+	u8 pad[2];
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_dcf_bw_cfg);
+
+/* VIRTCHNL_OP_DCF_CONFIG_BW
+ * VF send this message to set the bandwidth configuration of each
+ * TC with a specific vf id. The flag node_type is to indicate that
+ * this message is to configure VSI node or TC node bandwidth.
+ */
+struct virtchnl_dcf_bw_cfg_list {
+	u16 vf_id;
+	u8 num_elem;
+#define VIRTCHNL_DCF_TARGET_TC_BW	0
+#define VIRTCHNL_DCF_TARGET_VF_BW	1
+	u8 node_type;
+	struct virtchnl_dcf_bw_cfg cfg[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_dcf_bw_cfg_list);
+
 struct virtchnl_supported_rxdids {
 	/* see enum virtchnl_rx_desc_id_bitmasks */
 	u64 supported_rxdids;
@@ -1768,6 +1815,62 @@ struct virtchnl_fdir_del {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 
+/* VIRTCHNL_OP_GET_QOS_CAPS
+ * VF sends this message to get its QoS Caps, such as
+ * TC number, Arbiter and Bandwidth.
+ */
+struct virtchnl_qos_cap_elem {
+	u8 tc_num;
+	u8 tc_prio;
+#define VIRTCHNL_ABITER_STRICT      0
+#define VIRTCHNL_ABITER_ETS         2
+	u8 arbiter;
+#define VIRTCHNL_STRICT_WEIGHT      1
+	u8 weight;
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem);
+
+struct virtchnl_qos_cap_list {
+	u16 vsi_id;
+	u16 num_elem;
+	struct virtchnl_qos_cap_elem cap[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_qos_cap_list);
+
+/* VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP
+ * VF sends message virtchnl_queue_tc_mapping to set queue to tc
+ * mapping for all the Tx and Rx queues with a specified VSI, and
+ * would get response about bitmap of valid user priorities
+ * associated with queues.
+ */
+struct virtchnl_queue_tc_mapping {
+	u16 vsi_id;
+	u16 num_tc;
+	u16 num_queue_pairs;
+	u8 pad[2];
+	union {
+		struct {
+			u16 start_queue_id;
+			u16 queue_count;
+		} req;
+		struct {
+#define VIRTCHNL_USER_PRIO_TYPE_UP	0
+#define VIRTCHNL_USER_PRIO_TYPE_DSCP	1
+			u16 prio_type;
+			u16 valid_prio_bitmap;
+		} resp;
+	} tc[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
+
 /* VIRTCHNL_OP_QUERY_FDIR_FILTER
  * VF sends this request to PF by filling out vsi_id,
  * flow_id and reset_counter. PF will return query_info
@@ -2118,6 +2221,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DCF_GET_VSI_MAP:
 	case VIRTCHNL_OP_DCF_GET_PKG_INFO:
 		break;
+	case VIRTCHNL_OP_DCF_CONFIG_BW:
+		valid_len = sizeof(struct virtchnl_dcf_bw_cfg_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_dcf_bw_cfg_list *cfg_list =
+				(struct virtchnl_dcf_bw_cfg_list *)msg;
+			if (cfg_list->num_elem == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (cfg_list->num_elem - 1) *
+					 sizeof(struct virtchnl_dcf_bw_cfg);
+		}
+		break;
 	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		break;
 	case VIRTCHNL_OP_ADD_RSS_CFG:
@@ -2133,6 +2249,21 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_QUERY_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_query);
 		break;
+	case VIRTCHNL_OP_GET_QOS_CAPS:
+		break;
+	case VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP:
+		valid_len = sizeof(struct virtchnl_queue_tc_mapping);
+		if (msglen >= valid_len) {
+			struct virtchnl_queue_tc_mapping *q_tc =
+				(struct virtchnl_queue_tc_mapping *)msg;
+			if (q_tc->num_tc == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (q_tc->num_tc - 1) *
+					 sizeof(q_tc->tc[0]);
+		}
+		break;
 	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
 		break;
 	case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 2/5] net/ice/base: support DCF query port ETS adminq
  2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
@ 2021-06-25  9:31   ` Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 3/5] net/ice: support DCF link status event handling Ting Xu
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-25  9:31 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

In the adminq command query port ETS function, the root node teid is
needed. However, for DCF, the root node is not initialized, which will
cause error when we refer to the variable. In this patch, we will check
whether the root node is available or not first.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/base/ice_dcb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_dcb.c b/drivers/net/ice/base/ice_dcb.c
index c73fc095ff..9c9675f6ef 100644
--- a/drivers/net/ice/base/ice_dcb.c
+++ b/drivers/net/ice/base/ice_dcb.c
@@ -1524,7 +1524,8 @@ ice_aq_query_port_ets(struct ice_port_info *pi,
 		return ICE_ERR_PARAM;
 	cmd = &desc.params.port_ets;
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
-	cmd->port_teid = pi->root->info.node_teid;
+	if (pi->root)
+		cmd->port_teid = pi->root->info.node_teid;
 
 	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
 	return status;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 3/5] net/ice: support DCF link status event handling
  2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
@ 2021-06-25  9:31   ` Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-25  9:31 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

When link status changes, DCF will receive virtchnl PF event message.
Add support to handle this event, change link status and update link
info.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.h        |  6 ++++
 drivers/net/ice/ice_dcf_ethdev.c | 54 ++++++++++++++++++++++++++++++--
 drivers/net/ice/ice_dcf_parent.c | 51 ++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 0cb90b5e9f..587093b909 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -60,6 +60,10 @@ struct ice_dcf_hw {
 	uint16_t nb_msix;
 	uint16_t rxq_map[16];
 	struct virtchnl_eth_stats eth_stats_offset;
+
+	/* Link status */
+	bool link_up;
+	uint32_t link_speed;
 };
 
 int ice_dcf_execute_virtchnl_cmd(struct ice_dcf_hw *hw,
@@ -77,5 +81,7 @@ int ice_dcf_disable_queues(struct ice_dcf_hw *hw);
 int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 			struct virtchnl_eth_stats *pstats);
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
+int ice_dcf_link_update(struct rte_eth_dev *dev,
+		    __rte_unused int wait_to_complete);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index f73dc80bd9..0b40ebbec6 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -881,11 +881,59 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
-static int
-ice_dcf_link_update(__rte_unused struct rte_eth_dev *dev,
+int
+ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete)
 {
-	return 0;
+	struct ice_dcf_adapter *ad = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &ad->real_hw;
+	struct rte_eth_link new_link;
+
+	memset(&new_link, 0, sizeof(new_link));
+
+	/* Only read status info stored in VF, and the info is updated
+	 * when receive LINK_CHANGE event from PF by virtchnl.
+	 */
+	switch (hw->link_speed) {
+	case 10:
+		new_link.link_speed = ETH_SPEED_NUM_10M;
+		break;
+	case 100:
+		new_link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+	case 1000:
+		new_link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+	case 10000:
+		new_link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	case 20000:
+		new_link.link_speed = ETH_SPEED_NUM_20G;
+		break;
+	case 25000:
+		new_link.link_speed = ETH_SPEED_NUM_25G;
+		break;
+	case 40000:
+		new_link.link_speed = ETH_SPEED_NUM_40G;
+		break;
+	case 50000:
+		new_link.link_speed = ETH_SPEED_NUM_50G;
+		break;
+	case 100000:
+		new_link.link_speed = ETH_SPEED_NUM_100G;
+		break;
+	default:
+		new_link.link_speed = ETH_SPEED_NUM_NONE;
+		break;
+	}
+
+	new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	new_link.link_status = hw->link_up ? ETH_LINK_UP :
+					     ETH_LINK_DOWN;
+	new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+				ETH_LINK_SPEED_FIXED);
+
+	return rte_eth_linkstatus_set(dev, &new_link);
 }
 
 /* Add UDP tunneling port */
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 19420a0f58..788f6dd2a0 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -178,6 +178,44 @@ start_vsi_reset_thread(struct ice_dcf_hw *dcf_hw, bool vfr, uint16_t vf_id)
 	}
 }
 
+static uint32_t
+ice_dcf_convert_link_speed(enum virtchnl_link_speed virt_link_speed)
+{
+	uint32_t speed;
+
+	switch (virt_link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		speed = 2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		speed = 5000;
+		break;
+	default:
+		speed = 0;
+		break;
+	}
+
+	return speed;
+}
+
 void
 ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 			    uint8_t *msg, uint16_t msglen)
@@ -196,6 +234,19 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 		break;
 	case VIRTCHNL_EVENT_LINK_CHANGE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event");
+		dcf_hw->link_up = pf_msg->event_data.link_event.link_status;
+		if (dcf_hw->vf_res->vf_cap_flags &
+			VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+			dcf_hw->link_speed =
+				pf_msg->event_data.link_event_adv.link_speed;
+		} else {
+			enum virtchnl_link_speed speed;
+			speed = pf_msg->event_data.link_event.link_speed;
+			dcf_hw->link_speed = ice_dcf_convert_link_speed(speed);
+		}
+		ice_dcf_link_update(dcf_hw->eth_dev, 0);
+		rte_eth_dev_callback_process(dcf_hw->eth_dev,
+			RTE_ETH_EVENT_INTR_LSC, NULL);
 		break;
 	case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event");
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 4/5] net/ice: support QoS config VF bandwidth in DCF
  2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (2 preceding siblings ...)
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 3/5] net/ice: support DCF link status event handling Ting Xu
@ 2021-06-25  9:31   ` Ting Xu
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-25  9:31 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch supports the ETS-based QoS configuration. It enables the DCF
to configure bandwidth limits for each VF VSI of different TCs. A
hierarchy scheduler tree is built with port, TC and VSI nodes.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.c        |   6 +-
 drivers/net/ice/ice_dcf.h        |  47 +++
 drivers/net/ice/ice_dcf_ethdev.c |  13 +
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  30 ++
 drivers/net/ice/ice_dcf_sched.c  | 697 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 7 files changed, 797 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index d72a6f357e..4ff2216a5c 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -235,7 +235,8 @@ ice_dcf_get_vf_resource(struct ice_dcf_hw *hw)
 	caps = VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RX_POLLING |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED | VIRTCHNL_VF_CAP_DCF |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
+	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC |
+	       VIRTCHNL_VF_OFFLOAD_QOS;
 
 	err = ice_dcf_send_cmd_req_no_irq(hw, VIRTCHNL_OP_GET_VF_RESOURCES,
 					  (uint8_t *)&caps, sizeof(caps));
@@ -668,6 +669,9 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 		}
 	}
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		ice_dcf_tm_conf_init(eth_dev);
+
 	hw->eth_dev = eth_dev;
 	rte_intr_callback_register(&pci_dev->intr_handle,
 				   ice_dcf_dev_interrupt_handler, hw);
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 587093b909..e74e5d7e81 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -6,6 +6,7 @@
 #define _ICE_DCF_H_
 
 #include <ethdev_driver.h>
+#include <rte_tm_driver.h>
 
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
@@ -30,6 +31,49 @@ struct dcf_virtchnl_cmd {
 	volatile int pending;
 };
 
+struct ice_dcf_tm_shaper_profile {
+	TAILQ_ENTRY(ice_dcf_tm_shaper_profile) node;
+	uint32_t shaper_profile_id;
+	uint32_t reference_count;
+	struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(ice_dcf_shaper_profile_list, ice_dcf_tm_shaper_profile);
+
+/* Struct to store Traffic Manager node configuration. */
+struct ice_dcf_tm_node {
+	TAILQ_ENTRY(ice_dcf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct ice_dcf_tm_node *parent;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(ice_dcf_tm_node_list, ice_dcf_tm_node);
+
+/* node type of Traffic Manager */
+enum ice_dcf_tm_node_type {
+	ICE_DCF_TM_NODE_TYPE_PORT,
+	ICE_DCF_TM_NODE_TYPE_TC,
+	ICE_DCF_TM_NODE_TYPE_VSI,
+	ICE_DCF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct ice_dcf_tm_conf {
+	struct ice_dcf_shaper_profile_list shaper_profile_list;
+	struct ice_dcf_tm_node *root; /* root node - port */
+	struct ice_dcf_tm_node_list tc_list; /* node list for all the TCs */
+	struct ice_dcf_tm_node_list vsi_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_vsi_node;
+	bool committed;
+};
+
 struct ice_dcf_hw {
 	struct iavf_hw avf;
 
@@ -45,6 +89,8 @@ struct ice_dcf_hw {
 	uint16_t *vf_vsi_map;
 	uint16_t pf_vsi_id;
 
+	struct ice_dcf_tm_conf tm_conf;
+	struct ice_aqc_port_ets_elem *ets_config;
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
@@ -83,5 +129,6 @@ int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
 int ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete);
+void ice_dcf_tm_conf_init(struct rte_eth_dev *dev);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 0b40ebbec6..cab7c4da87 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -994,6 +994,18 @@ ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 	return ret;
 }
 
+static int
+ice_dcf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+		void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &ice_dcf_tm_ops;
+
+	return 0;
+}
+
 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.dev_start               = ice_dcf_dev_start,
 	.dev_stop                = ice_dcf_dev_stop,
@@ -1018,6 +1030,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.flow_ops_get            = ice_dcf_dev_flow_ops_get,
 	.udp_tunnel_port_add	 = ice_dcf_dev_udp_tunnel_port_add,
 	.udp_tunnel_port_del	 = ice_dcf_dev_udp_tunnel_port_del,
+	.tm_ops_get              = ice_dcf_tm_ops_get,
 };
 
 static int
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index e7c9d7fe41..8510e37119 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -7,6 +7,8 @@
 
 #include "base/ice_common.h"
 #include "base/ice_adminq_cmd.h"
+#include "base/ice_dcb.h"
+#include "base/ice_sched.h"
 
 #include "ice_ethdev.h"
 #include "ice_dcf.h"
@@ -52,6 +54,7 @@ struct ice_dcf_vf_repr {
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
 
+extern const struct rte_tm_ops ice_dcf_tm_ops;
 void ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 				 uint8_t *msg, uint16_t msglen);
 int ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 788f6dd2a0..0ea32cf8e9 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -264,6 +264,29 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 	}
 }
 
+static int
+ice_dcf_query_port_ets(struct ice_hw *parent_hw, struct ice_dcf_hw *real_hw)
+{
+	int ret;
+
+	real_hw->ets_config = (struct ice_aqc_port_ets_elem *)
+			ice_malloc(real_hw, sizeof(*real_hw->ets_config));
+	if (!real_hw->ets_config)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_query_port_ets(parent_hw->port_info,
+			real_hw->ets_config, sizeof(*real_hw->ets_config),
+			NULL);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "DCF Query Port ETS failed");
+		rte_free(real_hw->ets_config);
+		real_hw->ets_config = NULL;
+		return ret;
+	}
+
+	return ICE_SUCCESS;
+}
+
 static int
 ice_dcf_init_parent_hw(struct ice_hw *hw)
 {
@@ -486,6 +509,13 @@ ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev)
 		return err;
 	}
 
+	err = ice_dcf_query_port_ets(parent_hw, hw);
+	if (err) {
+		PMD_INIT_LOG(ERR, "failed to query port ets with error %d",
+			     err);
+		goto uninit_hw;
+	}
+
 	err = ice_dcf_load_pkg(parent_hw);
 	if (err) {
 		PMD_INIT_LOG(ERR, "failed to load package with error %d",
diff --git a/drivers/net/ice/ice_dcf_sched.c b/drivers/net/ice/ice_dcf_sched.c
new file mode 100644
index 0000000000..1f75675ba1
--- /dev/null
+++ b/drivers/net/ice/ice_dcf_sched.c
@@ -0,0 +1,697 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "base/ice_sched.h"
+#include "ice_dcf_ethdev.h"
+
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+				   uint32_t shaper_profile_id,
+				   struct rte_tm_error *error);
+
+const struct rte_tm_ops ice_dcf_tm_ops = {
+	.shaper_profile_add = ice_dcf_shaper_profile_add,
+	.shaper_profile_delete = ice_dcf_shaper_profile_del,
+	.hierarchy_commit = ice_dcf_hierarchy_commit,
+	.node_add = ice_dcf_node_add,
+	.node_delete = ice_dcf_node_delete,
+};
+
+void
+ice_dcf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+
+	/* initialize shaper profile list */
+	TAILQ_INIT(&hw->tm_conf.shaper_profile_list);
+
+	/* initialize node configuration */
+	hw->tm_conf.root = NULL;
+	TAILQ_INIT(&hw->tm_conf.tc_list);
+	TAILQ_INIT(&hw->tm_conf.vsi_list);
+	hw->tm_conf.nb_tc_node = 0;
+	hw->tm_conf.nb_vsi_node = 0;
+	hw->tm_conf.committed = false;
+}
+
+static inline struct ice_dcf_tm_node *
+dcf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum ice_dcf_tm_node_type *node_type)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct ice_dcf_tm_node_list *tc_list = &hw->tm_conf.tc_list;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (hw->tm_conf.root && hw->tm_conf.root->id == node_id) {
+		*node_type = ICE_DCF_TM_NODE_TYPE_PORT;
+		return hw->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, vsi_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_VSI;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct ice_dcf_tm_shaper_profile *
+dcf_shaper_profile_search(struct rte_eth_dev *dev,
+			   uint32_t shaper_profile_id)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_shaper_profile_list *shaper_profile_list =
+		&hw->tm_conf.shaper_profile_list;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	TAILQ_FOREACH(shaper_profile, shaper_profile_list, node) {
+		if (shaper_profile_id == shaper_profile->shaper_profile_id)
+			return shaper_profile;
+	}
+
+	return NULL;
+}
+
+static int
+dcf_node_param_check(struct ice_dcf_hw *hw, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= 8 * hw->num_vfs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type parent_node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_tm_shaper_profile *shaper_profile = NULL;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *parent_node;
+	struct ice_dcf_tm_node *tm_node;
+	uint16_t tc_nb = 1;
+	int i, ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = dcf_node_param_check(hw, node_id, priority, weight,
+				   params, error);
+	if (ret)
+		return ret;
+
+	for (i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (hw->ets_config->tc_valid_bits & (1 << i))
+			tc_nb++;
+	}
+
+	/* check if the node is already existed */
+	if (dcf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* check the shaper profile id */
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = dcf_shaper_profile_search(dev,
+			params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
+	}
+
+	/* add root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != ICE_DCF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (hw->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("ice_dcf_tm_node",
+				      sizeof(struct ice_dcf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		hw->tm_conf.root = tm_node;
+
+		return 0;
+	}
+
+	/* TC or vsi node */
+	/* check the parent node */
+	parent_node = dcf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != ICE_DCF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != ICE_DCF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not port or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the TC node number */
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (hw->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the vsi node number */
+		if (parent_node->reference_count >= hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many VSI for one TC";
+			return -EINVAL;
+		}
+		/* check the vsi node id */
+		if (node_id > tc_nb * hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large VSI id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or vsi node */
+	tm_node = rte_zmalloc("ice_dcf_tm_node",
+			      sizeof(struct ice_dcf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->priority = priority;
+	tm_node->weight = weight;
+	tm_node->shaper_profile = shaper_profile;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = hw->tm_conf.nb_tc_node;
+		hw->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.vsi_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		hw->tm_conf.nb_vsi_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	/* increase the reference counter of the shaper profile */
+	if (shaper_profile)
+		shaper_profile->reference_count++;
+
+	return 0;
+}
+
+static int
+ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = dcf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
+		rte_free(tm_node);
+		hw->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or VSI node */
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
+	tm_node->parent->reference_count--;
+	if (node_type == ICE_DCF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		hw->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		hw->tm_conf.nb_vsi_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+dcf_shaper_profile_param_check(struct rte_tm_shaper_params *profile,
+				struct rte_tm_error *error)
+{
+	/* min bucket size not supported */
+	if (profile->committed.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE;
+		error->message = "committed bucket size not supported";
+		return -EINVAL;
+	}
+	/* max bucket size not supported */
+	if (profile->peak.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE;
+		error->message = "peak bucket size not supported";
+		return -EINVAL;
+	}
+	/* length adjustment not supported */
+	if (profile->pkt_length_adjust) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN;
+		error->message = "packet length adjustment not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	int ret;
+
+	if (!profile || !error)
+		return -EINVAL;
+
+	ret = dcf_shaper_profile_param_check(profile, error);
+	if (ret)
+		return ret;
+
+	shaper_profile = dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID exist";
+		return -EINVAL;
+	}
+
+	shaper_profile = rte_zmalloc("ice_dcf_tm_shaper_profile",
+				     sizeof(struct ice_dcf_tm_shaper_profile),
+				     0);
+	if (!shaper_profile)
+		return -ENOMEM;
+	shaper_profile->shaper_profile_id = shaper_profile_id;
+	rte_memcpy(&shaper_profile->profile, profile,
+			 sizeof(struct rte_tm_shaper_params));
+	TAILQ_INSERT_TAIL(&hw->tm_conf.shaper_profile_list,
+			  shaper_profile, node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	if (!error)
+		return -EINVAL;
+
+	shaper_profile = dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (!shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID not exist";
+		return -EINVAL;
+	}
+
+	/* don't delete a profile if it's used by one or several nodes */
+	if (shaper_profile->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
+		error->message = "profile in use";
+		return -EINVAL;
+	}
+
+	TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list, shaper_profile, node);
+	rte_free(shaper_profile);
+
+	return 0;
+}
+
+static int
+ice_dcf_set_vf_bw(struct ice_dcf_hw *hw,
+			struct virtchnl_dcf_bw_cfg_list *vf_bw,
+			uint16_t len)
+{
+	struct dcf_virtchnl_cmd args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.v_op = VIRTCHNL_OP_DCF_CONFIG_BW;
+	args.req_msg = (uint8_t *)vf_bw;
+	args.req_msglen  = len;
+	err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "fail to execute command %s",
+			    "VIRTCHNL_OP_DCF_CONFIG_BW");
+	return err;
+}
+
+static int
+ice_dcf_validate_tc_bw(struct virtchnl_dcf_bw_cfg_list *tc_bw,
+			uint32_t port_bw)
+{
+	struct virtchnl_dcf_bw_cfg *cfg;
+	bool lowest_cir_mark = false;
+	u32 total_peak, rest_peak;
+	u32 committed, peak;
+	int i;
+
+	total_peak = 0;
+	for (i = 0; i < tc_bw->num_elem; i++)
+		total_peak += tc_bw->cfg[i].shaper.peak;
+
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		cfg = &tc_bw->cfg[i];
+		peak = cfg->shaper.peak;
+		committed = cfg->shaper.committed;
+		rest_peak = total_peak - peak;
+
+		if (lowest_cir_mark && peak == 0) {
+			PMD_DRV_LOG(ERR, "Max bandwidth must be configured for TC%u\n",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (!lowest_cir_mark && committed)
+			lowest_cir_mark = true;
+
+		if (committed && committed + rest_peak > port_bw) {
+			PMD_DRV_LOG(ERR, "Total value of TC%u min bandwidth and other TCs' max bandwidth %ukbps should be less than port link speed %ukbps\n",
+				cfg->tc_num, committed + rest_peak, port_bw);
+			return -EINVAL;
+		}
+
+		if (committed && committed < ICE_SCHED_MIN_BW) {
+			PMD_DRV_LOG(ERR, "If TC%u min Tx bandwidth is set, it cannot be less than 500Kbps\n",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak && committed > peak) {
+			PMD_DRV_LOG(ERR, "TC%u Min Tx bandwidth cannot be greater than max Tx bandwidth\n",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak > port_bw) {
+			PMD_DRV_LOG(ERR, "TC%u max Tx bandwidth %uKbps is greater than current link speed %uKbps\n",
+				cfg->tc_num, peak, port_bw);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct virtchnl_dcf_bw_cfg_list *vf_bw;
+	struct virtchnl_dcf_bw_cfg_list *tc_bw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct rte_tm_shaper_params *profile;
+	struct ice_dcf_tm_node *tm_node;
+	uint32_t port_bw, cir_total;
+	uint16_t size, vf_id;
+	int num_elem = 0;
+	int ret, i;
+
+	size = sizeof(struct virtchnl_dcf_bw_cfg_list) +
+		sizeof(struct virtchnl_dcf_bw_cfg) *
+		(hw->tm_conf.nb_tc_node - 1);
+	vf_bw = rte_zmalloc("vf_bw", size, 0);
+	if (!vf_bw)
+		return ICE_ERR_NO_MEMORY;
+	tc_bw = rte_zmalloc("tc_bw", size, 0);
+	if (!tc_bw)
+		return ICE_ERR_NO_MEMORY;
+
+	/* port bandwidth (Kbps) */
+	port_bw = hw->link_speed * 1000;
+	cir_total = 0;
+
+	/* init tc bw configuration */
+#define ICE_DCF_SCHED_TC_NODE 0xffff
+	tc_bw->vf_id = ICE_DCF_SCHED_TC_NODE;
+	tc_bw->node_type = VIRTCHNL_DCF_TARGET_TC_BW;
+	tc_bw->num_elem = hw->tm_conf.nb_tc_node;
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		tc_bw->cfg[i].tc_num = i;
+		tc_bw->cfg[i].type = VIRTCHNL_BW_SHAPER;
+		tc_bw->cfg[i].bw_type |=
+			VIRTCHNL_DCF_BW_PIR | VIRTCHNL_DCF_BW_CIR;
+	}
+
+	for (vf_id = 0; vf_id < hw->num_vfs; vf_id++) {
+		num_elem = 0;
+		vf_bw->vf_id = vf_id;
+		vf_bw->node_type = VIRTCHNL_DCF_TARGET_VF_BW;
+		TAILQ_FOREACH(tm_node, vsi_list, node) {
+			/* scan the nodes belong to one VSI */
+			if (tm_node->id - hw->num_vfs * tm_node->tc != vf_id)
+				continue;
+			vf_bw->cfg[num_elem].tc_num = tm_node->tc;
+			vf_bw->cfg[num_elem].type = VIRTCHNL_BW_SHAPER;
+			if (tm_node->shaper_profile) {
+				/* Transfer from Byte per seconds to Kbps */
+				profile = &tm_node->shaper_profile->profile;
+				vf_bw->cfg[num_elem].shaper.peak =
+				profile->peak.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].shaper.committed =
+				profile->committed.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].bw_type |=
+					VIRTCHNL_DCF_BW_PIR |
+					VIRTCHNL_DCF_BW_CIR;
+			}
+
+			/* update tc node bw configuration */
+			tc_bw->cfg[tm_node->tc].shaper.peak +=
+				vf_bw->cfg[num_elem].shaper.peak;
+			tc_bw->cfg[tm_node->tc].shaper.committed +=
+				vf_bw->cfg[num_elem].shaper.committed;
+
+			cir_total += vf_bw->cfg[num_elem].shaper.committed;
+			num_elem++;
+		}
+
+		/* check if total CIR is larger than port bandwidth */
+		if (cir_total > port_bw) {
+			PMD_DRV_LOG(ERR, "Total CIR of all VFs is larger than port bandwidth");
+			return ICE_ERR_PARAM;
+		}
+		vf_bw->num_elem = num_elem;
+		ret = ice_dcf_set_vf_bw(hw, vf_bw, size);
+		if (ret)
+			return ret;
+		memset(vf_bw, 0, size);
+	}
+
+	/* check and commit tc node bw configuration */
+	ret = ice_dcf_validate_tc_bw(tc_bw, port_bw);
+	if (ret)
+		return ret;
+	ret = ice_dcf_set_vf_bw(hw, tc_bw, size);
+	if (ret)
+		return ret;
+
+	hw->tm_conf.committed = true;
+	return ICE_SUCCESS;
+}
diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
index 65750d3501..0b86d74a49 100644
--- a/drivers/net/ice/meson.build
+++ b/drivers/net/ice/meson.build
@@ -70,6 +70,7 @@ endif
 sources += files('ice_dcf.c',
          'ice_dcf_vf_representor.c',
          'ice_dcf_ethdev.c',
-         'ice_dcf_parent.c')
+         'ice_dcf_parent.c',
+	 'ice_dcf_sched.c')
 
 headers = files('rte_pmd_ice.h')
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v3 5/5] net/iavf: query QoS cap and set queue TC mapping
  2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (3 preceding siblings ...)
  2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
@ 2021-06-25  9:31   ` Ting Xu
  4 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-25  9:31 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang

This patch added the support for VF to config the ETS-based Tx QoS,
including querying current QoS configuration from PF and config queue TC
mapping. PF QoS is configured in advance and the queried info is
provided to the user for future usage. VF queues are mapped to different
TCs in PF through virtchnl.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h        |  45 +++
 drivers/net/iavf/iavf_ethdev.c |  31 ++
 drivers/net/iavf/iavf_tm.c     | 667 +++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c  |  56 ++-
 drivers/net/iavf/meson.build   |   1 +
 5 files changed, 799 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4f5811ae87..77ddf15f42 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -6,6 +6,8 @@
 #define _IAVF_ETHDEV_H_
 
 #include <rte_kvargs.h>
+#include <rte_tm_driver.h>
+
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
 #include <iavf_type.h>
@@ -82,6 +84,8 @@
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
 
+#define IAVF_BITS_PER_BYTE 8
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -129,6 +133,38 @@ enum iavf_aq_result {
 	IAVF_MSG_CMD,      /* Read async command result */
 };
 
+/* Struct to store Traffic Manager node configuration. */
+struct iavf_tm_node {
+	TAILQ_ENTRY(iavf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct iavf_tm_node *parent;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
+
+/* node type of Traffic Manager */
+enum iavf_tm_node_type {
+	IAVF_TM_NODE_TYPE_PORT,
+	IAVF_TM_NODE_TYPE_TC,
+	IAVF_TM_NODE_TYPE_QUEUE,
+	IAVF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct iavf_tm_conf {
+	struct iavf_tm_node *root; /* root node - vf vsi */
+	struct iavf_tm_node_list tc_list; /* node list for all the TCs */
+	struct iavf_tm_node_list queue_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_queue_node;
+	bool committed;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -175,6 +211,9 @@ struct iavf_info {
 	struct iavf_fdir_info fdir; /* flow director info */
 	/* indicate large VF support enabled or not */
 	bool lv_enabled;
+
+	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_tm_conf tm_conf;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -344,4 +383,10 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			uint32_t mc_addrs_num, bool add);
 int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
+int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+			struct virtchnl_queue_tc_mapping *q_tc_mapping,
+			uint16_t size);
+void iavf_tm_conf_init(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops iavf_tm_ops;
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 5290588b17..c8ee1a834b 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -122,6 +122,7 @@ static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
 static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
 			uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
@@ -200,8 +201,21 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
 	.flow_ops_get               = iavf_dev_flow_ops_get,
 	.tx_done_cleanup	    = iavf_dev_tx_done_cleanup,
 	.get_monitor_addr           = iavf_get_monitor_addr,
+	.tm_ops_get                 = iavf_tm_ops_get,
 };
 
+static int
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+			void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &iavf_tm_ops;
+
+	return 0;
+}
+
 static int
 iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
@@ -806,6 +820,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
 				      dev->data->nb_tx_queues);
 	num_queue_pairs = vf->num_queue_pairs;
 
+	if (iavf_get_qos_cap(adapter)) {
+		PMD_INIT_LOG(ERR, "Failed to get qos capability");
+		return -1;
+	}
+
 	if (iavf_init_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "failed to do Queue init");
 		return -1;
@@ -2090,6 +2109,15 @@ iavf_init_vf(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
 		goto err_api;
 	}
+
+	bufsz = sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+	vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+	if (!vf->qos_cap) {
+		PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+		goto err_api;
+	}
+
 	if (iavf_get_vf_resource(adapter) != 0) {
 		PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
 		goto err_alloc;
@@ -2131,6 +2159,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
 	rte_free(vf->rss_key);
 	rte_free(vf->rss_lut);
 err_alloc:
+	rte_free(vf->qos_cap);
 	rte_free(vf->vf_res);
 	vf->vsi_res = NULL;
 err_api:
@@ -2299,6 +2328,8 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
 
 	iavf_default_rss_disable(adapter);
 
+	iavf_tm_conf_init(eth_dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
new file mode 100644
index 0000000000..0211fcc0a3
--- /dev/null
+++ b/drivers/net/iavf/iavf_tm.c
@@ -0,0 +1,667 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "iavf.h"
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error);
+static int iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error);
+static int iavf_node_capabilities_get(struct rte_eth_dev *dev,
+				      uint32_t node_id,
+				      struct rte_tm_node_capabilities *cap,
+				      struct rte_tm_error *error);
+static int iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error);
+
+const struct rte_tm_ops iavf_tm_ops = {
+	.node_add = iavf_tm_node_add,
+	.node_delete = iavf_tm_node_delete,
+	.capabilities_get = iavf_tm_capabilities_get,
+	.level_capabilities_get = iavf_level_capabilities_get,
+	.node_capabilities_get = iavf_node_capabilities_get,
+	.node_type_get = iavf_node_type_get,
+	.hierarchy_commit = iavf_hierarchy_commit,
+};
+
+void
+iavf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	/* initialize node configuration */
+	vf->tm_conf.root = NULL;
+	TAILQ_INIT(&vf->tm_conf.tc_list);
+	TAILQ_INIT(&vf->tm_conf.queue_list);
+	vf->tm_conf.nb_tc_node = 0;
+	vf->tm_conf.nb_queue_node = 0;
+	vf->tm_conf.committed = false;
+}
+
+
+static inline struct iavf_tm_node *
+iavf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum iavf_tm_node_type *node_type)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node_list *tc_list = &vf->tm_conf.tc_list;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+
+	if (vf->tm_conf.root && vf->tm_conf.root->id == node_id) {
+		*node_type = IAVF_TM_NODE_TYPE_PORT;
+		return vf->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_QUEUE;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+iavf_node_param_check(struct iavf_info *vf, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shaper profile */
+	if (params->shaper_profile_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+		error->message = "shaper profile not supported";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= vf->num_queue_pairs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error)
+{
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!is_leaf || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type == IAVF_TM_NODE_TYPE_QUEUE)
+		*is_leaf = true;
+	else
+		*is_leaf = false;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	enum iavf_tm_node_type parent_node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+	struct iavf_tm_node *parent_node;
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+	int ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = iavf_node_param_check(vf, node_id, priority, weight,
+				    params, error);
+	if (ret)
+		return ret;
+
+	/* check if the node is already existed */
+	if (iavf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != IAVF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (vf->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("iavf_tm_node",
+				      sizeof(struct iavf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		vf->tm_conf.root = tm_node;
+		return 0;
+	}
+
+	/* TC or queue node */
+	/* check the parent node */
+	parent_node = iavf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != IAVF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not root or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the node number */
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (vf->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the queue number */
+		if (parent_node->reference_count >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many queues";
+			return -EINVAL;
+		}
+		if (node_id >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large queue id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or queue node */
+	tm_node = rte_zmalloc("iavf_tm_node",
+			      sizeof(struct iavf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = vf->tm_conf.nb_tc_node;
+		vf->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.queue_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		vf->tm_conf.nb_queue_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == IAVF_TM_NODE_TYPE_PORT) {
+		rte_free(tm_node);
+		vf->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or queue node */
+	tm_node->parent->reference_count--;
+	if (node_type == IAVF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		vf->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		vf->tm_conf.nb_queue_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (tc_nb > vf->vf_res->num_queue_pairs)
+		return -EINVAL;
+
+	error->type = RTE_TM_ERROR_TYPE_NONE;
+
+	/* set all the parameters to 0 first. */
+	memset(cap, 0, sizeof(struct rte_tm_capabilities));
+
+	/**
+	 * support port + TCs + queues
+	 * here shows the max capability not the current configuration.
+	 */
+	cap->n_nodes_max = 1 + IAVF_MAX_TRAFFIC_CLASS
+		+ vf->num_queue_pairs;
+	cap->n_levels_max = 3; /* port, TC, queue */
+	cap->non_leaf_nodes_identical = 1;
+	cap->leaf_nodes_identical = 1;
+	cap->shaper_n_max = cap->n_nodes_max;
+	cap->shaper_private_n_max = cap->n_nodes_max;
+	cap->shaper_private_dual_rate_n_max = 0;
+	cap->shaper_private_rate_min = 0;
+	/* GBps */
+	cap->shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->shaper_private_packet_mode_supported = 0;
+	cap->shaper_private_byte_mode_supported = 1;
+	cap->shaper_shared_n_max = 0;
+	cap->shaper_shared_n_nodes_per_shaper_max = 0;
+	cap->shaper_shared_n_shapers_per_node_max = 0;
+	cap->shaper_shared_dual_rate_n_max = 0;
+	cap->shaper_shared_rate_min = 0;
+	cap->shaper_shared_rate_max = 0;
+	cap->shaper_shared_packet_mode_supported = 0;
+	cap->shaper_shared_byte_mode_supported = 0;
+	cap->sched_n_children_max = vf->num_queue_pairs;
+	cap->sched_sp_n_priorities_max = 1;
+	cap->sched_wfq_n_children_per_group_max = 0;
+	cap->sched_wfq_n_groups_max = 0;
+	cap->sched_wfq_weight_max = 1;
+	cap->sched_wfq_packet_mode_supported = 0;
+	cap->sched_wfq_byte_mode_supported = 0;
+	cap->cman_head_drop_supported = 0;
+	cap->dynamic_update_mask = 0;
+	cap->shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD;
+	cap->shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS;
+	cap->cman_wred_context_n_max = 0;
+	cap->cman_wred_context_private_n_max = 0;
+	cap->cman_wred_context_shared_n_max = 0;
+	cap->cman_wred_context_shared_n_nodes_per_context_max = 0;
+	cap->cman_wred_context_shared_n_contexts_per_node_max = 0;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (level_id >= IAVF_TM_NODE_TYPE_MAX) {
+		error->type = RTE_TM_ERROR_TYPE_LEVEL_ID;
+		error->message = "too deep level";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (level_id == IAVF_TM_NODE_TYPE_PORT) {
+		cap->n_nodes_max = 1;
+		cap->n_nodes_nonleaf_max = 1;
+		cap->n_nodes_leaf_max = 0;
+	} else if (level_id == IAVF_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = vf->num_queue_pairs;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = vf->num_queue_pairs;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IAVF_TM_NODE_TYPE_QUEUE) {
+		cap->nonleaf.shaper_private_supported = true;
+		cap->nonleaf.shaper_private_dual_rate_supported = false;
+		cap->nonleaf.shaper_private_rate_min = 0;
+		/* GBps */
+		cap->nonleaf.shaper_private_rate_max =
+			vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+		cap->nonleaf.shaper_private_packet_mode_supported = 0;
+		cap->nonleaf.shaper_private_byte_mode_supported = 1;
+		cap->nonleaf.shaper_shared_n_max = 0;
+		cap->nonleaf.shaper_shared_packet_mode_supported = 0;
+		cap->nonleaf.shaper_shared_byte_mode_supported = 0;
+		if (level_id == IAVF_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IAVF_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				vf->num_queue_pairs;
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+		cap->nonleaf.sched_wfq_packet_mode_supported = 0;
+		cap->nonleaf.sched_wfq_byte_mode_supported = 0;
+		cap->nonleaf.stats_mask = 0;
+
+		return 0;
+	}
+
+	/* queue node */
+	cap->leaf.shaper_private_supported = false;
+	cap->leaf.shaper_private_dual_rate_supported = false;
+	cap->leaf.shaper_private_rate_min = 0;
+	/* GBps */
+	cap->leaf.shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->leaf.shaper_private_packet_mode_supported = 0;
+	cap->leaf.shaper_private_byte_mode_supported = 1;
+	cap->leaf.shaper_shared_n_max = 0;
+	cap->leaf.shaper_shared_packet_mode_supported = 0;
+	cap->leaf.shaper_shared_byte_mode_supported = 0;
+	cap->leaf.cman_head_drop_supported = false;
+	cap->leaf.cman_wred_context_private_supported = true;
+	cap->leaf.cman_wred_context_shared_n_max = 0;
+	cap->leaf.stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_node_capabilities_get(struct rte_eth_dev *dev,
+			   uint32_t node_id,
+			   struct rte_tm_node_capabilities *cap,
+			   struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type;
+	struct virtchnl_qos_cap_elem tc_cap;
+	struct iavf_tm_node *tm_node;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "not support capability get";
+		return -EINVAL;
+	}
+
+	tc_cap = vf->qos_cap->cap[tm_node->tc];
+	if (tc_cap.tc_num != tm_node->tc) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "tc not match";
+		return -EINVAL;
+	}
+
+	cap->shaper_private_supported = true;
+	cap->shaper_private_dual_rate_supported = false;
+	cap->shaper_private_rate_min = tc_cap.shaper.committed;
+	cap->shaper_private_rate_max = tc_cap.shaper.peak;
+	cap->shaper_shared_n_max = 0;
+	cap->nonleaf.sched_n_children_max = vf->num_queue_pairs;
+	cap->nonleaf.sched_sp_n_priorities_max = 1;
+	cap->nonleaf.sched_wfq_n_children_per_group_max = 1;
+	cap->nonleaf.sched_wfq_n_groups_max = 0;
+	cap->nonleaf.sched_wfq_weight_max = tc_cap.weight;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct virtchnl_queue_tc_mapping *q_tc_mapping;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+	uint16_t size;
+	int index = 0, node_committed = 0;
+	int ret, i;
+
+	size = sizeof(*q_tc_mapping) + sizeof(q_tc_mapping->tc[0]) *
+		(vf->qos_cap->num_elem - 1);
+	q_tc_mapping = rte_zmalloc("q_tc", size, 0);
+	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
+	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
+	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->tc >= q_tc_mapping->num_tc) {
+			PMD_DRV_LOG(ERR, "TC%d is not enabled", tm_node->tc);
+			return IAVF_ERR_PARAM;
+		}
+		q_tc_mapping->tc[tm_node->tc].req.queue_count++;
+		node_committed++;
+	}
+
+	for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+		q_tc_mapping->tc[i].req.start_queue_id = index;
+		index += q_tc_mapping->tc[i].req.queue_count;
+	}
+	if (node_committed < vf->num_queue_pairs) {
+		PMD_DRV_LOG(ERR, "queue node is less than allocated queue pairs");
+		return IAVF_ERR_PARAM;
+	}
+
+	ret = iavf_set_q_tc_map(dev, q_tc_mapping, size);
+	if (ret)
+		return ret;
+
+	return IAVF_SUCCESS;
+}
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 02e828f9b7..06dc663947 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -467,7 +467,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 		VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 		VIRTCHNL_VF_OFFLOAD_CRC |
 		VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-		VIRTCHNL_VF_LARGE_NUM_QPAIRS;
+		VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+		VIRTCHNL_VF_OFFLOAD_QOS;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -1550,6 +1551,59 @@ iavf_set_hena(struct iavf_adapter *adapter, uint64_t hena)
 	return err;
 }
 
+int
+iavf_get_qos_cap(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	uint32_t len;
+	int err;
+
+	args.ops = VIRTCHNL_OP_GET_QOS_CAPS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+	err = iavf_execute_vf_cmd(adapter, &args);
+
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_VF_RESOURCE");
+		return -1;
+	}
+
+	len =  sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+
+	rte_memcpy(vf->qos_cap, args.out_buffer,
+		   RTE_MIN(args.out_size, len));
+
+	return 0;
+}
+
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+		struct virtchnl_queue_tc_mapping *q_tc_mapping, uint16_t size)
+{
+	struct iavf_adapter *adapter =
+			IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_cmd_info args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.ops = VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP;
+	args.in_args = (uint8_t *)q_tc_mapping;
+	args.in_args_size = size;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	err = iavf_execute_vf_cmd(adapter, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "Failed to execute command of"
+			    " VIRTCHNL_OP_CONFIG_TC_MAP");
+	return err;
+}
+
 int
 iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			struct rte_ether_addr *mc_addrs,
diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build
index 6f222a9e87..f2010a8337 100644
--- a/drivers/net/iavf/meson.build
+++ b/drivers/net/iavf/meson.build
@@ -19,6 +19,7 @@ sources = files(
         'iavf_generic_flow.c',
         'iavf_fdir.c',
         'iavf_hash.c',
+        'iavf_tm.c',
 )
 
 if arch_subdir == 'x86'
-- 
2.25.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (6 preceding siblings ...)
  2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-06-30  6:53 ` Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
                     ` (6 more replies)
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-07-01 11:41 ` Ting Xu
  9 siblings, 7 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch enables the ETS-based Tx QoS for IAVF. Kernel tool is used to
configure ETS first. DCF is used to set bandwidth limit for VFs of each
TC. IAVF is supported to query QoS capability and set queue TC mapping.
Traffic Management API is utilized to configure the QoS hierarchy
scheduler tree. The scheduler tree will be passed to hardware to enable
all above functions.

Ting Xu (7):
  common/iavf: support ETS-based QoS offload configuration
  net/ice/base: support DCF query port ETS adminq
  net/ice: support DCF link status event handling
  net/ice: support QoS config VF bandwidth in DCF
  net/iavf: query QoS cap and set queue TC mapping
  net/iavf: check Tx packet with correct UP and queue
  doc: release note for ETS-based Tx QoS

 doc/guides/rel_notes/release_21_08.rst |   7 +
 drivers/common/iavf/iavf_type.h        |   2 +
 drivers/common/iavf/virtchnl.h         | 131 +++++
 drivers/net/iavf/iavf.h                |  56 ++
 drivers/net/iavf/iavf_ethdev.c         |  34 ++
 drivers/net/iavf/iavf_rxtx.c           |  43 ++
 drivers/net/iavf/iavf_tm.c             | 727 +++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c          |  56 +-
 drivers/net/iavf/meson.build           |   1 +
 drivers/net/ice/base/ice_dcb.c         |   3 +-
 drivers/net/ice/ice_dcf.c              |   9 +-
 drivers/net/ice/ice_dcf.h              |  54 ++
 drivers/net/ice/ice_dcf_ethdev.c       |  68 ++-
 drivers/net/ice/ice_dcf_ethdev.h       |   3 +
 drivers/net/ice/ice_dcf_parent.c       |  81 +++
 drivers/net/ice/ice_dcf_sched.c        | 759 +++++++++++++++++++++++++
 drivers/net/ice/meson.build            |   3 +-
 17 files changed, 2030 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 1/7] common/iavf: support ETS-based QoS offload configuration
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-06-30  6:53   ` Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch adds new virtchnl opcodes and structures for QoS
configuration, which includes:
1. VIRTCHNL_VF_OFFLOAD_TC, to negotiate the capability supporting QoS
configuration. If VF and PF both have this flag, then the ETS-based QoS
offload function is supported.
2. VIRTCHNL_OP_DCF_CONFIG_BW, DCF is supposed to configure min and max
bandwidth for each VF per enabled TCs. To make the VSI node bandwidth
configuration work, DCF also needs to configure TC node bandwidth
directly.
3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such
as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The
configuration is previously set by DCB and DCF, and now is the potential
QoS capability of VF. VF can take it as reference to configure queue TC
mapping.
4. VIRTCHNL_OP_CONFIG_TC_MAP, set VF queues to TC mapping for all Tx and
Rx queues. Queues mapping to one TC should be continuous and all
allocated queues should be mapped.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/common/iavf/iavf_type.h |   2 +
 drivers/common/iavf/virtchnl.h  | 131 ++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)

diff --git a/drivers/common/iavf/iavf_type.h b/drivers/common/iavf/iavf_type.h
index f3815d523b..73dfb47e70 100644
--- a/drivers/common/iavf/iavf_type.h
+++ b/drivers/common/iavf/iavf_type.h
@@ -141,6 +141,8 @@ enum iavf_debug_mask {
 #define IAVF_PHY_LED_MODE_MASK			0xFFFF
 #define IAVF_PHY_LED_MODE_ORIG			0x80000000
 
+#define IAVF_MAX_TRAFFIC_CLASS	8
+
 /* Memory types */
 enum iavf_memset_type {
 	IAVF_NONDMA_MEM = 0,
diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 197edce8a1..1cf0866124 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -85,6 +85,10 @@ enum virtchnl_rx_hsplit {
 	VIRTCHNL_RX_HSPLIT_SPLIT_SCTP    = 8,
 };
 
+enum virtchnl_bw_limit_type {
+	VIRTCHNL_BW_SHAPER = 0,
+};
+
 #define VIRTCHNL_ETH_LENGTH_OF_ADDRESS	6
 /* END GENERIC DEFINES */
 
@@ -130,6 +134,7 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 	/* opcodes 34, 35, 36, and 37 are reserved */
+	VIRTCHNL_OP_DCF_CONFIG_BW = 37,
 	VIRTCHNL_OP_DCF_VLAN_OFFLOAD = 38,
 	VIRTCHNL_OP_DCF_CMD_DESC = 39,
 	VIRTCHNL_OP_DCF_CMD_BUFF = 40,
@@ -152,6 +157,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
 	VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2 = 58,
 	VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2 = 59,
+	VIRTCHNL_OP_GET_QOS_CAPS = 66,
+	VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP = 67,
 	VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
 	VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
 	VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
@@ -398,6 +405,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+#define VIRTCHNL_VF_OFFLOAD_QOS		BIT(29)
 #define VIRTCHNL_VF_CAP_DCF			BIT(30)
 	/* BIT(31) is reserved */
 
@@ -1285,6 +1293,14 @@ struct virtchnl_filter {
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
 
+struct virtchnl_shaper_bw {
+	/* Unit is Kbps */
+	u32 committed;
+	u32 peak;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw);
+
 /* VIRTCHNL_OP_DCF_GET_VSI_MAP
  * VF sends this message to get VSI mapping table.
  * PF responds with an indirect message containing VF's
@@ -1357,6 +1373,37 @@ struct virtchnl_dcf_vlan_offload {
 
 VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_dcf_vlan_offload);
 
+struct virtchnl_dcf_bw_cfg {
+	u8 tc_num;
+#define VIRTCHNL_DCF_BW_CIR		BIT(0)
+#define VIRTCHNL_DCF_BW_PIR		BIT(1)
+	u8 bw_type;
+	u8 pad[2];
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_dcf_bw_cfg);
+
+/* VIRTCHNL_OP_DCF_CONFIG_BW
+ * VF send this message to set the bandwidth configuration of each
+ * TC with a specific vf id. The flag node_type is to indicate that
+ * this message is to configure VSI node or TC node bandwidth.
+ */
+struct virtchnl_dcf_bw_cfg_list {
+	u16 vf_id;
+	u8 num_elem;
+#define VIRTCHNL_DCF_TARGET_TC_BW	0
+#define VIRTCHNL_DCF_TARGET_VF_BW	1
+	u8 node_type;
+	struct virtchnl_dcf_bw_cfg cfg[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_dcf_bw_cfg_list);
+
 struct virtchnl_supported_rxdids {
 	/* see enum virtchnl_rx_desc_id_bitmasks */
 	u64 supported_rxdids;
@@ -1768,6 +1815,62 @@ struct virtchnl_fdir_del {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 
+/* VIRTCHNL_OP_GET_QOS_CAPS
+ * VF sends this message to get its QoS Caps, such as
+ * TC number, Arbiter and Bandwidth.
+ */
+struct virtchnl_qos_cap_elem {
+	u8 tc_num;
+	u8 tc_prio;
+#define VIRTCHNL_ABITER_STRICT      0
+#define VIRTCHNL_ABITER_ETS         2
+	u8 arbiter;
+#define VIRTCHNL_STRICT_WEIGHT      1
+	u8 weight;
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem);
+
+struct virtchnl_qos_cap_list {
+	u16 vsi_id;
+	u16 num_elem;
+	struct virtchnl_qos_cap_elem cap[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_qos_cap_list);
+
+/* VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP
+ * VF sends message virtchnl_queue_tc_mapping to set queue to tc
+ * mapping for all the Tx and Rx queues with a specified VSI, and
+ * would get response about bitmap of valid user priorities
+ * associated with queues.
+ */
+struct virtchnl_queue_tc_mapping {
+	u16 vsi_id;
+	u16 num_tc;
+	u16 num_queue_pairs;
+	u8 pad[2];
+	union {
+		struct {
+			u16 start_queue_id;
+			u16 queue_count;
+		} req;
+		struct {
+#define VIRTCHNL_USER_PRIO_TYPE_UP	0
+#define VIRTCHNL_USER_PRIO_TYPE_DSCP	1
+			u16 prio_type;
+			u16 valid_prio_bitmap;
+		} resp;
+	} tc[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
+
 /* VIRTCHNL_OP_QUERY_FDIR_FILTER
  * VF sends this request to PF by filling out vsi_id,
  * flow_id and reset_counter. PF will return query_info
@@ -2118,6 +2221,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DCF_GET_VSI_MAP:
 	case VIRTCHNL_OP_DCF_GET_PKG_INFO:
 		break;
+	case VIRTCHNL_OP_DCF_CONFIG_BW:
+		valid_len = sizeof(struct virtchnl_dcf_bw_cfg_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_dcf_bw_cfg_list *cfg_list =
+				(struct virtchnl_dcf_bw_cfg_list *)msg;
+			if (cfg_list->num_elem == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (cfg_list->num_elem - 1) *
+					 sizeof(struct virtchnl_dcf_bw_cfg);
+		}
+		break;
 	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		break;
 	case VIRTCHNL_OP_ADD_RSS_CFG:
@@ -2133,6 +2249,21 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_QUERY_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_query);
 		break;
+	case VIRTCHNL_OP_GET_QOS_CAPS:
+		break;
+	case VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP:
+		valid_len = sizeof(struct virtchnl_queue_tc_mapping);
+		if (msglen >= valid_len) {
+			struct virtchnl_queue_tc_mapping *q_tc =
+				(struct virtchnl_queue_tc_mapping *)msg;
+			if (q_tc->num_tc == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (q_tc->num_tc - 1) *
+					 sizeof(q_tc->tc[0]);
+		}
+		break;
 	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
 		break;
 	case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 2/7] net/ice/base: support DCF query port ETS adminq
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
@ 2021-06-30  6:53   ` Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 3/7] net/ice: support DCF link status event handling Ting Xu
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

In the adminq command query port ETS function, the root node teid is
needed. However, for DCF, the root node is not initialized, which will
cause error when we refer to the variable. In this patch, we will check
whether the root node is available or not first.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/base/ice_dcb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_dcb.c b/drivers/net/ice/base/ice_dcb.c
index c73fc095ff..9c9675f6ef 100644
--- a/drivers/net/ice/base/ice_dcb.c
+++ b/drivers/net/ice/base/ice_dcb.c
@@ -1524,7 +1524,8 @@ ice_aq_query_port_ets(struct ice_port_info *pi,
 		return ICE_ERR_PARAM;
 	cmd = &desc.params.port_ets;
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
-	cmd->port_teid = pi->root->info.node_teid;
+	if (pi->root)
+		cmd->port_teid = pi->root->info.node_teid;
 
 	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
 	return status;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 3/7] net/ice: support DCF link status event handling
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
@ 2021-06-30  6:53   ` Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

When link status changes, DCF will receive virtchnl PF event message.
Add support to handle this event, change link status and update link
info.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.h        |  6 ++++
 drivers/net/ice/ice_dcf_ethdev.c | 54 ++++++++++++++++++++++++++++++--
 drivers/net/ice/ice_dcf_parent.c | 51 ++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 0cb90b5e9f..587093b909 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -60,6 +60,10 @@ struct ice_dcf_hw {
 	uint16_t nb_msix;
 	uint16_t rxq_map[16];
 	struct virtchnl_eth_stats eth_stats_offset;
+
+	/* Link status */
+	bool link_up;
+	uint32_t link_speed;
 };
 
 int ice_dcf_execute_virtchnl_cmd(struct ice_dcf_hw *hw,
@@ -77,5 +81,7 @@ int ice_dcf_disable_queues(struct ice_dcf_hw *hw);
 int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 			struct virtchnl_eth_stats *pstats);
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
+int ice_dcf_link_update(struct rte_eth_dev *dev,
+		    __rte_unused int wait_to_complete);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index f73dc80bd9..0b40ebbec6 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -881,11 +881,59 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
-static int
-ice_dcf_link_update(__rte_unused struct rte_eth_dev *dev,
+int
+ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete)
 {
-	return 0;
+	struct ice_dcf_adapter *ad = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &ad->real_hw;
+	struct rte_eth_link new_link;
+
+	memset(&new_link, 0, sizeof(new_link));
+
+	/* Only read status info stored in VF, and the info is updated
+	 * when receive LINK_CHANGE event from PF by virtchnl.
+	 */
+	switch (hw->link_speed) {
+	case 10:
+		new_link.link_speed = ETH_SPEED_NUM_10M;
+		break;
+	case 100:
+		new_link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+	case 1000:
+		new_link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+	case 10000:
+		new_link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	case 20000:
+		new_link.link_speed = ETH_SPEED_NUM_20G;
+		break;
+	case 25000:
+		new_link.link_speed = ETH_SPEED_NUM_25G;
+		break;
+	case 40000:
+		new_link.link_speed = ETH_SPEED_NUM_40G;
+		break;
+	case 50000:
+		new_link.link_speed = ETH_SPEED_NUM_50G;
+		break;
+	case 100000:
+		new_link.link_speed = ETH_SPEED_NUM_100G;
+		break;
+	default:
+		new_link.link_speed = ETH_SPEED_NUM_NONE;
+		break;
+	}
+
+	new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	new_link.link_status = hw->link_up ? ETH_LINK_UP :
+					     ETH_LINK_DOWN;
+	new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+				ETH_LINK_SPEED_FIXED);
+
+	return rte_eth_linkstatus_set(dev, &new_link);
 }
 
 /* Add UDP tunneling port */
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 19420a0f58..788f6dd2a0 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -178,6 +178,44 @@ start_vsi_reset_thread(struct ice_dcf_hw *dcf_hw, bool vfr, uint16_t vf_id)
 	}
 }
 
+static uint32_t
+ice_dcf_convert_link_speed(enum virtchnl_link_speed virt_link_speed)
+{
+	uint32_t speed;
+
+	switch (virt_link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		speed = 2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		speed = 5000;
+		break;
+	default:
+		speed = 0;
+		break;
+	}
+
+	return speed;
+}
+
 void
 ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 			    uint8_t *msg, uint16_t msglen)
@@ -196,6 +234,19 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 		break;
 	case VIRTCHNL_EVENT_LINK_CHANGE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event");
+		dcf_hw->link_up = pf_msg->event_data.link_event.link_status;
+		if (dcf_hw->vf_res->vf_cap_flags &
+			VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+			dcf_hw->link_speed =
+				pf_msg->event_data.link_event_adv.link_speed;
+		} else {
+			enum virtchnl_link_speed speed;
+			speed = pf_msg->event_data.link_event.link_speed;
+			dcf_hw->link_speed = ice_dcf_convert_link_speed(speed);
+		}
+		ice_dcf_link_update(dcf_hw->eth_dev, 0);
+		rte_eth_dev_callback_process(dcf_hw->eth_dev,
+			RTE_ETH_EVENT_INTR_LSC, NULL);
 		break;
 	case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event");
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 4/7] net/ice: support QoS config VF bandwidth in DCF
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (2 preceding siblings ...)
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 3/7] net/ice: support DCF link status event handling Ting Xu
@ 2021-06-30  6:53   ` Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch supports the ETS-based QoS configuration. It enables the DCF
to configure bandwidth limits for each VF VSI of different TCs. A
hierarchy scheduler tree is built with port, TC and VSI nodes.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.c        |   9 +-
 drivers/net/ice/ice_dcf.h        |  48 ++
 drivers/net/ice/ice_dcf_ethdev.c |  14 +
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  30 ++
 drivers/net/ice/ice_dcf_sched.c  | 759 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 7 files changed, 864 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index d72a6f357e..349d23ee4f 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -235,7 +235,8 @@ ice_dcf_get_vf_resource(struct ice_dcf_hw *hw)
 	caps = VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RX_POLLING |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED | VIRTCHNL_VF_CAP_DCF |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
+	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC |
+	       VIRTCHNL_VF_OFFLOAD_QOS;
 
 	err = ice_dcf_send_cmd_req_no_irq(hw, VIRTCHNL_OP_GET_VF_RESOURCES,
 					  (uint8_t *)&caps, sizeof(caps));
@@ -668,6 +669,9 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 		}
 	}
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		ice_dcf_tm_conf_init(eth_dev);
+
 	hw->eth_dev = eth_dev;
 	rte_intr_callback_register(&pci_dev->intr_handle,
 				   ice_dcf_dev_interrupt_handler, hw);
@@ -703,6 +707,9 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 	ice_dcf_mode_disable(hw);
 	iavf_shutdown_adminq(&hw->avf);
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		ice_dcf_tm_conf_uninit(eth_dev);
+
 	rte_free(hw->arq_buf);
 	rte_free(hw->vf_vsi_map);
 	rte_free(hw->vf_res);
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 587093b909..1c7653de3d 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -6,6 +6,7 @@
 #define _ICE_DCF_H_
 
 #include <ethdev_driver.h>
+#include <rte_tm_driver.h>
 
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
@@ -30,6 +31,49 @@ struct dcf_virtchnl_cmd {
 	volatile int pending;
 };
 
+struct ice_dcf_tm_shaper_profile {
+	TAILQ_ENTRY(ice_dcf_tm_shaper_profile) node;
+	uint32_t shaper_profile_id;
+	uint32_t reference_count;
+	struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(ice_dcf_shaper_profile_list, ice_dcf_tm_shaper_profile);
+
+/* Struct to store Traffic Manager node configuration. */
+struct ice_dcf_tm_node {
+	TAILQ_ENTRY(ice_dcf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct ice_dcf_tm_node *parent;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(ice_dcf_tm_node_list, ice_dcf_tm_node);
+
+/* node type of Traffic Manager */
+enum ice_dcf_tm_node_type {
+	ICE_DCF_TM_NODE_TYPE_PORT,
+	ICE_DCF_TM_NODE_TYPE_TC,
+	ICE_DCF_TM_NODE_TYPE_VSI,
+	ICE_DCF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct ice_dcf_tm_conf {
+	struct ice_dcf_shaper_profile_list shaper_profile_list;
+	struct ice_dcf_tm_node *root; /* root node - port */
+	struct ice_dcf_tm_node_list tc_list; /* node list for all the TCs */
+	struct ice_dcf_tm_node_list vsi_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_vsi_node;
+	bool committed;
+};
+
 struct ice_dcf_hw {
 	struct iavf_hw avf;
 
@@ -45,6 +89,8 @@ struct ice_dcf_hw {
 	uint16_t *vf_vsi_map;
 	uint16_t pf_vsi_id;
 
+	struct ice_dcf_tm_conf tm_conf;
+	struct ice_aqc_port_ets_elem *ets_config;
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
@@ -83,5 +129,7 @@ int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
 int ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete);
+void ice_dcf_tm_conf_init(struct rte_eth_dev *dev);
+void ice_dcf_tm_conf_uninit(struct rte_eth_dev *dev);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 0b40ebbec6..69fe6e63d1 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -622,6 +622,7 @@ ice_dcf_dev_stop(struct rte_eth_dev *dev)
 	ice_dcf_add_del_all_mac_addr(&dcf_ad->real_hw, false);
 	dev->data->dev_link.link_status = ETH_LINK_DOWN;
 	ad->pf.adapter_stopped = 1;
+	dcf_ad->real_hw.tm_conf.committed = false;
 
 	return 0;
 }
@@ -994,6 +995,18 @@ ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 	return ret;
 }
 
+static int
+ice_dcf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+		void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &ice_dcf_tm_ops;
+
+	return 0;
+}
+
 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.dev_start               = ice_dcf_dev_start,
 	.dev_stop                = ice_dcf_dev_stop,
@@ -1018,6 +1031,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.flow_ops_get            = ice_dcf_dev_flow_ops_get,
 	.udp_tunnel_port_add	 = ice_dcf_dev_udp_tunnel_port_add,
 	.udp_tunnel_port_del	 = ice_dcf_dev_udp_tunnel_port_del,
+	.tm_ops_get              = ice_dcf_tm_ops_get,
 };
 
 static int
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index e7c9d7fe41..8510e37119 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -7,6 +7,8 @@
 
 #include "base/ice_common.h"
 #include "base/ice_adminq_cmd.h"
+#include "base/ice_dcb.h"
+#include "base/ice_sched.h"
 
 #include "ice_ethdev.h"
 #include "ice_dcf.h"
@@ -52,6 +54,7 @@ struct ice_dcf_vf_repr {
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
 
+extern const struct rte_tm_ops ice_dcf_tm_ops;
 void ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 				 uint8_t *msg, uint16_t msglen);
 int ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 788f6dd2a0..0ea32cf8e9 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -264,6 +264,29 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 	}
 }
 
+static int
+ice_dcf_query_port_ets(struct ice_hw *parent_hw, struct ice_dcf_hw *real_hw)
+{
+	int ret;
+
+	real_hw->ets_config = (struct ice_aqc_port_ets_elem *)
+			ice_malloc(real_hw, sizeof(*real_hw->ets_config));
+	if (!real_hw->ets_config)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_query_port_ets(parent_hw->port_info,
+			real_hw->ets_config, sizeof(*real_hw->ets_config),
+			NULL);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "DCF Query Port ETS failed");
+		rte_free(real_hw->ets_config);
+		real_hw->ets_config = NULL;
+		return ret;
+	}
+
+	return ICE_SUCCESS;
+}
+
 static int
 ice_dcf_init_parent_hw(struct ice_hw *hw)
 {
@@ -486,6 +509,13 @@ ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev)
 		return err;
 	}
 
+	err = ice_dcf_query_port_ets(parent_hw, hw);
+	if (err) {
+		PMD_INIT_LOG(ERR, "failed to query port ets with error %d",
+			     err);
+		goto uninit_hw;
+	}
+
 	err = ice_dcf_load_pkg(parent_hw);
 	if (err) {
 		PMD_INIT_LOG(ERR, "failed to load package with error %d",
diff --git a/drivers/net/ice/ice_dcf_sched.c b/drivers/net/ice/ice_dcf_sched.c
new file mode 100644
index 0000000000..5c82a1a559
--- /dev/null
+++ b/drivers/net/ice/ice_dcf_sched.c
@@ -0,0 +1,759 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "base/ice_sched.h"
+#include "ice_dcf_ethdev.h"
+
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+				   uint32_t shaper_profile_id,
+				   struct rte_tm_error *error);
+
+const struct rte_tm_ops ice_dcf_tm_ops = {
+	.shaper_profile_add = ice_dcf_shaper_profile_add,
+	.shaper_profile_delete = ice_dcf_shaper_profile_del,
+	.hierarchy_commit = ice_dcf_hierarchy_commit,
+	.node_add = ice_dcf_node_add,
+	.node_delete = ice_dcf_node_delete,
+};
+
+void
+ice_dcf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+
+	/* initialize shaper profile list */
+	TAILQ_INIT(&hw->tm_conf.shaper_profile_list);
+
+	/* initialize node configuration */
+	hw->tm_conf.root = NULL;
+	TAILQ_INIT(&hw->tm_conf.tc_list);
+	TAILQ_INIT(&hw->tm_conf.vsi_list);
+	hw->tm_conf.nb_tc_node = 0;
+	hw->tm_conf.nb_vsi_node = 0;
+	hw->tm_conf.committed = false;
+}
+
+void
+ice_dcf_tm_conf_uninit(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct ice_dcf_tm_node *tm_node;
+
+	/* clear node configuration */
+	while ((tm_node = TAILQ_FIRST(&hw->tm_conf.vsi_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	hw->tm_conf.nb_vsi_node = 0;
+	while ((tm_node = TAILQ_FIRST(&hw->tm_conf.tc_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	hw->tm_conf.nb_tc_node = 0;
+	if (hw->tm_conf.root) {
+		rte_free(hw->tm_conf.root);
+		hw->tm_conf.root = NULL;
+	}
+
+	/* Remove all shaper profiles */
+	while ((shaper_profile =
+	       TAILQ_FIRST(&hw->tm_conf.shaper_profile_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list,
+			     shaper_profile, node);
+		rte_free(shaper_profile);
+	}
+}
+
+static inline struct ice_dcf_tm_node *
+ice_dcf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum ice_dcf_tm_node_type *node_type)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct ice_dcf_tm_node_list *tc_list = &hw->tm_conf.tc_list;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (hw->tm_conf.root && hw->tm_conf.root->id == node_id) {
+		*node_type = ICE_DCF_TM_NODE_TYPE_PORT;
+		return hw->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, vsi_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_VSI;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct ice_dcf_tm_shaper_profile *
+ice_dcf_shaper_profile_search(struct rte_eth_dev *dev,
+			   uint32_t shaper_profile_id)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_shaper_profile_list *shaper_profile_list =
+		&hw->tm_conf.shaper_profile_list;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	TAILQ_FOREACH(shaper_profile, shaper_profile_list, node) {
+		if (shaper_profile_id == shaper_profile->shaper_profile_id)
+			return shaper_profile;
+	}
+
+	return NULL;
+}
+
+static int
+ice_dcf_node_param_check(struct ice_dcf_hw *hw, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= 8 * hw->num_vfs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type parent_node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_tm_shaper_profile *shaper_profile = NULL;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *parent_node;
+	struct ice_dcf_tm_node *tm_node;
+	uint16_t tc_nb = 1;
+	int i, ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = ice_dcf_node_param_check(hw, node_id, priority, weight,
+				   params, error);
+	if (ret)
+		return ret;
+
+	for (i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (hw->ets_config->tc_valid_bits & (1 << i))
+			tc_nb++;
+	}
+
+	/* check if the node is already existed */
+	if (ice_dcf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* check the shaper profile id */
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = ice_dcf_shaper_profile_search(dev,
+			params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
+	}
+
+	/* add root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != ICE_DCF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (hw->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("ice_dcf_tm_node",
+				      sizeof(struct ice_dcf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		hw->tm_conf.root = tm_node;
+
+		return 0;
+	}
+
+	/* TC or vsi node */
+	/* check the parent node */
+	parent_node = ice_dcf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != ICE_DCF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != ICE_DCF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not port or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the TC node number */
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (hw->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the vsi node number */
+		if (parent_node->reference_count >= hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many VSI for one TC";
+			return -EINVAL;
+		}
+		/* check the vsi node id */
+		if (node_id > tc_nb * hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large VSI id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or vsi node */
+	tm_node = rte_zmalloc("ice_dcf_tm_node",
+			      sizeof(struct ice_dcf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->priority = priority;
+	tm_node->weight = weight;
+	tm_node->shaper_profile = shaper_profile;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = hw->tm_conf.nb_tc_node;
+		hw->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.vsi_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		hw->tm_conf.nb_vsi_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	/* increase the reference counter of the shaper profile */
+	if (shaper_profile)
+		shaper_profile->reference_count++;
+
+	return 0;
+}
+
+static int
+ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = ice_dcf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
+		rte_free(tm_node);
+		hw->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or VSI node */
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
+	tm_node->parent->reference_count--;
+	if (node_type == ICE_DCF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		hw->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		hw->tm_conf.nb_vsi_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_param_check(struct rte_tm_shaper_params *profile,
+				struct rte_tm_error *error)
+{
+	/* min bucket size not supported */
+	if (profile->committed.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE;
+		error->message = "committed bucket size not supported";
+		return -EINVAL;
+	}
+	/* max bucket size not supported */
+	if (profile->peak.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE;
+		error->message = "peak bucket size not supported";
+		return -EINVAL;
+	}
+	/* length adjustment not supported */
+	if (profile->pkt_length_adjust) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN;
+		error->message = "packet length adjustment not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	int ret;
+
+	if (!profile || !error)
+		return -EINVAL;
+
+	ret = ice_dcf_shaper_profile_param_check(profile, error);
+	if (ret)
+		return ret;
+
+	shaper_profile = ice_dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID exist";
+		return -EINVAL;
+	}
+
+	shaper_profile = rte_zmalloc("ice_dcf_tm_shaper_profile",
+				     sizeof(struct ice_dcf_tm_shaper_profile),
+				     0);
+	if (!shaper_profile)
+		return -ENOMEM;
+	shaper_profile->shaper_profile_id = shaper_profile_id;
+	rte_memcpy(&shaper_profile->profile, profile,
+			 sizeof(struct rte_tm_shaper_params));
+	TAILQ_INSERT_TAIL(&hw->tm_conf.shaper_profile_list,
+			  shaper_profile, node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	if (!error)
+		return -EINVAL;
+
+	shaper_profile = ice_dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (!shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID not exist";
+		return -EINVAL;
+	}
+
+	/* don't delete a profile if it's used by one or several nodes */
+	if (shaper_profile->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
+		error->message = "profile in use";
+		return -EINVAL;
+	}
+
+	TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list, shaper_profile, node);
+	rte_free(shaper_profile);
+
+	return 0;
+}
+
+static int
+ice_dcf_set_vf_bw(struct ice_dcf_hw *hw,
+			struct virtchnl_dcf_bw_cfg_list *vf_bw,
+			uint16_t len)
+{
+	struct dcf_virtchnl_cmd args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.v_op = VIRTCHNL_OP_DCF_CONFIG_BW;
+	args.req_msg = (uint8_t *)vf_bw;
+	args.req_msglen  = len;
+	err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "fail to execute command %s",
+			    "VIRTCHNL_OP_DCF_CONFIG_BW");
+	return err;
+}
+
+static int
+ice_dcf_validate_tc_bw(struct virtchnl_dcf_bw_cfg_list *tc_bw,
+			uint32_t port_bw)
+{
+	struct virtchnl_dcf_bw_cfg *cfg;
+	bool lowest_cir_mark = false;
+	u32 total_peak, rest_peak;
+	u32 committed, peak;
+	int i;
+
+	total_peak = 0;
+	for (i = 0; i < tc_bw->num_elem; i++)
+		total_peak += tc_bw->cfg[i].shaper.peak;
+
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		cfg = &tc_bw->cfg[i];
+		peak = cfg->shaper.peak;
+		committed = cfg->shaper.committed;
+		rest_peak = total_peak - peak;
+
+		if (lowest_cir_mark && peak == 0) {
+			PMD_DRV_LOG(ERR, "Max bandwidth must be configured for TC%u",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (!lowest_cir_mark && committed)
+			lowest_cir_mark = true;
+
+		if (committed && committed + rest_peak > port_bw) {
+			PMD_DRV_LOG(ERR, "Total value of TC%u min bandwidth and other TCs' max bandwidth %ukbps should be less than port link speed %ukbps",
+				cfg->tc_num, committed + rest_peak, port_bw);
+			return -EINVAL;
+		}
+
+		if (committed && committed < ICE_SCHED_MIN_BW) {
+			PMD_DRV_LOG(ERR, "If TC%u min Tx bandwidth is set, it cannot be less than 500Kbps",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak && committed > peak) {
+			PMD_DRV_LOG(ERR, "TC%u Min Tx bandwidth cannot be greater than max Tx bandwidth",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak > port_bw) {
+			PMD_DRV_LOG(ERR, "TC%u max Tx bandwidth %uKbps is greater than current link speed %uKbps",
+				cfg->tc_num, peak, port_bw);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct virtchnl_dcf_bw_cfg_list *vf_bw;
+	struct virtchnl_dcf_bw_cfg_list *tc_bw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct rte_tm_shaper_params *profile;
+	struct ice_dcf_tm_node *tm_node;
+	uint32_t port_bw, cir_total;
+	uint16_t size, vf_id;
+	uint8_t num_elem = 0;
+	int i, ret_val = ICE_SUCCESS;
+
+	/* check if all TC nodes are set */
+	if (BIT(hw->tm_conf.nb_tc_node) & hw->ets_config->tc_valid_bits) {
+		PMD_DRV_LOG(ERR, "Not all enabled TC nodes are set");
+		ret_val = ICE_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	size = sizeof(struct virtchnl_dcf_bw_cfg_list) +
+		sizeof(struct virtchnl_dcf_bw_cfg) *
+		(hw->tm_conf.nb_tc_node - 1);
+	vf_bw = rte_zmalloc("vf_bw", size, 0);
+	if (!vf_bw) {
+		ret_val = ICE_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+	tc_bw = rte_zmalloc("tc_bw", size, 0);
+	if (!tc_bw) {
+		ret_val = ICE_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+
+	/* port bandwidth (Kbps) */
+	port_bw = hw->link_speed * 1000;
+	cir_total = 0;
+
+	/* init tc bw configuration */
+#define ICE_DCF_SCHED_TC_NODE 0xffff
+	tc_bw->vf_id = ICE_DCF_SCHED_TC_NODE;
+	tc_bw->node_type = VIRTCHNL_DCF_TARGET_TC_BW;
+	tc_bw->num_elem = hw->tm_conf.nb_tc_node;
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		tc_bw->cfg[i].tc_num = i;
+		tc_bw->cfg[i].type = VIRTCHNL_BW_SHAPER;
+		tc_bw->cfg[i].bw_type |=
+			VIRTCHNL_DCF_BW_PIR | VIRTCHNL_DCF_BW_CIR;
+	}
+
+	for (vf_id = 0; vf_id < hw->num_vfs; vf_id++) {
+		num_elem = 0;
+		vf_bw->vf_id = vf_id;
+		vf_bw->node_type = VIRTCHNL_DCF_TARGET_VF_BW;
+		TAILQ_FOREACH(tm_node, vsi_list, node) {
+			/* scan the nodes belong to one VSI */
+			if (tm_node->id - hw->num_vfs * tm_node->tc != vf_id)
+				continue;
+			vf_bw->cfg[num_elem].tc_num = tm_node->tc;
+			vf_bw->cfg[num_elem].type = VIRTCHNL_BW_SHAPER;
+			if (tm_node->shaper_profile) {
+				/* Transfer from Byte per seconds to Kbps */
+				profile = &tm_node->shaper_profile->profile;
+				vf_bw->cfg[num_elem].shaper.peak =
+				profile->peak.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].shaper.committed =
+				profile->committed.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].bw_type |=
+					VIRTCHNL_DCF_BW_PIR |
+					VIRTCHNL_DCF_BW_CIR;
+			}
+
+			/* update tc node bw configuration */
+			tc_bw->cfg[tm_node->tc].shaper.peak +=
+				vf_bw->cfg[num_elem].shaper.peak;
+			tc_bw->cfg[tm_node->tc].shaper.committed +=
+				vf_bw->cfg[num_elem].shaper.committed;
+
+			cir_total += vf_bw->cfg[num_elem].shaper.committed;
+			num_elem++;
+		}
+
+		/* check if all TC nodes are set with VF vsi nodes */
+		if (num_elem != hw->tm_conf.nb_tc_node) {
+			PMD_DRV_LOG(ERR, "VF%u vsi nodes are not set to all TC nodes",
+				    vf_id);
+			ret_val = ICE_ERR_PARAM;
+			goto fail_clear;
+		}
+
+		vf_bw->num_elem = num_elem;
+		ret_val = ice_dcf_set_vf_bw(hw, vf_bw, size);
+		if (ret_val)
+			goto fail_clear;
+		memset(vf_bw, 0, size);
+	}
+
+	/* check if total CIR is larger than port bandwidth */
+	if (cir_total > port_bw) {
+		PMD_DRV_LOG(ERR, "Total CIR of all VFs is larger than port bandwidth");
+		ret_val = ICE_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	/* check and commit tc node bw configuration */
+	ret_val = ice_dcf_validate_tc_bw(tc_bw, port_bw);
+	if (ret_val)
+		goto fail_clear;
+	ret_val = ice_dcf_set_vf_bw(hw, tc_bw, size);
+	if (ret_val)
+		goto fail_clear;
+
+	hw->tm_conf.committed = true;
+	return ret_val;
+
+fail_clear:
+	/* clear all the traffic manager configuration */
+	if (clear_on_fail) {
+		ice_dcf_tm_conf_uninit(dev);
+		ice_dcf_tm_conf_init(dev);
+	}
+	return ret_val;
+}
diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
index 65750d3501..0b86d74a49 100644
--- a/drivers/net/ice/meson.build
+++ b/drivers/net/ice/meson.build
@@ -70,6 +70,7 @@ endif
 sources += files('ice_dcf.c',
          'ice_dcf_vf_representor.c',
          'ice_dcf_ethdev.c',
-         'ice_dcf_parent.c')
+         'ice_dcf_parent.c',
+	 'ice_dcf_sched.c')
 
 headers = files('rte_pmd_ice.h')
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 5/7] net/iavf: query QoS cap and set queue TC mapping
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (3 preceding siblings ...)
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
@ 2021-06-30  6:53   ` Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 7/7] doc: release note for ETS-based Tx QoS Ting Xu
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch added the support for VF to config the ETS-based Tx QoS,
including querying current QoS configuration from PF and config queue TC
mapping. PF QoS is configured in advance and the queried info is
provided to the user for future usage. VF queues are mapped to different
TCs in PF through virtchnl.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h        |  46 +++
 drivers/net/iavf/iavf_ethdev.c |  34 ++
 drivers/net/iavf/iavf_tm.c     | 714 +++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c  |  56 ++-
 drivers/net/iavf/meson.build   |   1 +
 5 files changed, 850 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4f5811ae87..feb8337b55 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -6,6 +6,8 @@
 #define _IAVF_ETHDEV_H_
 
 #include <rte_kvargs.h>
+#include <rte_tm_driver.h>
+
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
 #include <iavf_type.h>
@@ -82,6 +84,8 @@
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
 
+#define IAVF_BITS_PER_BYTE 8
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -129,6 +133,38 @@ enum iavf_aq_result {
 	IAVF_MSG_CMD,      /* Read async command result */
 };
 
+/* Struct to store Traffic Manager node configuration. */
+struct iavf_tm_node {
+	TAILQ_ENTRY(iavf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct iavf_tm_node *parent;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
+
+/* node type of Traffic Manager */
+enum iavf_tm_node_type {
+	IAVF_TM_NODE_TYPE_PORT,
+	IAVF_TM_NODE_TYPE_TC,
+	IAVF_TM_NODE_TYPE_QUEUE,
+	IAVF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct iavf_tm_conf {
+	struct iavf_tm_node *root; /* root node - vf vsi */
+	struct iavf_tm_node_list tc_list; /* node list for all the TCs */
+	struct iavf_tm_node_list queue_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_queue_node;
+	bool committed;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -175,6 +211,9 @@ struct iavf_info {
 	struct iavf_fdir_info fdir; /* flow director info */
 	/* indicate large VF support enabled or not */
 	bool lv_enabled;
+
+	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_tm_conf tm_conf;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -344,4 +383,11 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			uint32_t mc_addrs_num, bool add);
 int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
+int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+			struct virtchnl_queue_tc_mapping *q_tc_mapping,
+			uint16_t size);
+void iavf_tm_conf_init(struct rte_eth_dev *dev);
+void iavf_tm_conf_uninit(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops iavf_tm_ops;
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 5290588b17..93f82773e2 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -122,6 +122,7 @@ static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
 static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
 			uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
@@ -200,8 +201,21 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
 	.flow_ops_get               = iavf_dev_flow_ops_get,
 	.tx_done_cleanup	    = iavf_dev_tx_done_cleanup,
 	.get_monitor_addr           = iavf_get_monitor_addr,
+	.tm_ops_get                 = iavf_tm_ops_get,
 };
 
+static int
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+			void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &iavf_tm_ops;
+
+	return 0;
+}
+
 static int
 iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
@@ -806,6 +820,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
 				      dev->data->nb_tx_queues);
 	num_queue_pairs = vf->num_queue_pairs;
 
+	if (iavf_get_qos_cap(adapter)) {
+		PMD_INIT_LOG(ERR, "Failed to get qos capability");
+		return -1;
+	}
+
 	if (iavf_init_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "failed to do Queue init");
 		return -1;
@@ -891,6 +910,7 @@ iavf_dev_stop(struct rte_eth_dev *dev)
 
 	adapter->stopped = 1;
 	dev->data->dev_started = 0;
+	vf->tm_conf.committed = false;
 
 	return 0;
 }
@@ -2090,6 +2110,15 @@ iavf_init_vf(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
 		goto err_api;
 	}
+
+	bufsz = sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+	vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+	if (!vf->qos_cap) {
+		PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+		goto err_api;
+	}
+
 	if (iavf_get_vf_resource(adapter) != 0) {
 		PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
 		goto err_alloc;
@@ -2131,6 +2160,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
 	rte_free(vf->rss_key);
 	rte_free(vf->rss_lut);
 err_alloc:
+	rte_free(vf->qos_cap);
 	rte_free(vf->vf_res);
 	vf->vsi_res = NULL;
 err_api:
@@ -2299,6 +2329,8 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
 
 	iavf_default_rss_disable(adapter);
 
+	iavf_tm_conf_init(eth_dev);
+
 	return 0;
 }
 
@@ -2338,6 +2370,8 @@ iavf_dev_close(struct rte_eth_dev *dev)
 				     iavf_dev_interrupt_handler, dev);
 	iavf_disable_irq0(hw);
 
+	iavf_tm_conf_uninit(dev);
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
 		if (vf->rss_lut) {
 			rte_free(vf->rss_lut);
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
new file mode 100644
index 0000000000..a8fc142c89
--- /dev/null
+++ b/drivers/net/iavf/iavf_tm.c
@@ -0,0 +1,714 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "iavf.h"
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error);
+static int iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error);
+static int iavf_node_capabilities_get(struct rte_eth_dev *dev,
+				      uint32_t node_id,
+				      struct rte_tm_node_capabilities *cap,
+				      struct rte_tm_error *error);
+static int iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error);
+
+const struct rte_tm_ops iavf_tm_ops = {
+	.node_add = iavf_tm_node_add,
+	.node_delete = iavf_tm_node_delete,
+	.capabilities_get = iavf_tm_capabilities_get,
+	.level_capabilities_get = iavf_level_capabilities_get,
+	.node_capabilities_get = iavf_node_capabilities_get,
+	.node_type_get = iavf_node_type_get,
+	.hierarchy_commit = iavf_hierarchy_commit,
+};
+
+void
+iavf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	/* initialize node configuration */
+	vf->tm_conf.root = NULL;
+	TAILQ_INIT(&vf->tm_conf.tc_list);
+	TAILQ_INIT(&vf->tm_conf.queue_list);
+	vf->tm_conf.nb_tc_node = 0;
+	vf->tm_conf.nb_queue_node = 0;
+	vf->tm_conf.committed = false;
+}
+
+void
+iavf_tm_conf_uninit(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node *tm_node;
+
+	/* clear node configuration */
+	while ((tm_node = TAILQ_FIRST(&vf->tm_conf.queue_list))) {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	vf->tm_conf.nb_queue_node = 0;
+	while ((tm_node = TAILQ_FIRST(&vf->tm_conf.tc_list))) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	vf->tm_conf.nb_tc_node = 0;
+	if (vf->tm_conf.root) {
+		rte_free(vf->tm_conf.root);
+		vf->tm_conf.root = NULL;
+	}
+}
+
+static inline struct iavf_tm_node *
+iavf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum iavf_tm_node_type *node_type)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node_list *tc_list = &vf->tm_conf.tc_list;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+
+	if (vf->tm_conf.root && vf->tm_conf.root->id == node_id) {
+		*node_type = IAVF_TM_NODE_TYPE_PORT;
+		return vf->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_QUEUE;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+iavf_node_param_check(struct iavf_info *vf, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shaper profile */
+	if (params->shaper_profile_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+		error->message = "shaper profile not supported";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= vf->num_queue_pairs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error)
+{
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!is_leaf || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type == IAVF_TM_NODE_TYPE_QUEUE)
+		*is_leaf = true;
+	else
+		*is_leaf = false;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	enum iavf_tm_node_type parent_node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+	struct iavf_tm_node *parent_node;
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+	int ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = iavf_node_param_check(vf, node_id, priority, weight,
+				    params, error);
+	if (ret)
+		return ret;
+
+	/* check if the node is already existed */
+	if (iavf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != IAVF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (vf->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("iavf_tm_node",
+				      sizeof(struct iavf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		vf->tm_conf.root = tm_node;
+		return 0;
+	}
+
+	/* TC or queue node */
+	/* check the parent node */
+	parent_node = iavf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != IAVF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not root or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the node number */
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (vf->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the queue number */
+		if (parent_node->reference_count >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many queues";
+			return -EINVAL;
+		}
+		if (node_id >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large queue id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or queue node */
+	tm_node = rte_zmalloc("iavf_tm_node",
+			      sizeof(struct iavf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = vf->tm_conf.nb_tc_node;
+		vf->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.queue_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		vf->tm_conf.nb_queue_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == IAVF_TM_NODE_TYPE_PORT) {
+		rte_free(tm_node);
+		vf->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or queue node */
+	tm_node->parent->reference_count--;
+	if (node_type == IAVF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		vf->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		vf->tm_conf.nb_queue_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (tc_nb > vf->vf_res->num_queue_pairs)
+		return -EINVAL;
+
+	error->type = RTE_TM_ERROR_TYPE_NONE;
+
+	/* set all the parameters to 0 first. */
+	memset(cap, 0, sizeof(struct rte_tm_capabilities));
+
+	/**
+	 * support port + TCs + queues
+	 * here shows the max capability not the current configuration.
+	 */
+	cap->n_nodes_max = 1 + IAVF_MAX_TRAFFIC_CLASS
+		+ vf->num_queue_pairs;
+	cap->n_levels_max = 3; /* port, TC, queue */
+	cap->non_leaf_nodes_identical = 1;
+	cap->leaf_nodes_identical = 1;
+	cap->shaper_n_max = cap->n_nodes_max;
+	cap->shaper_private_n_max = cap->n_nodes_max;
+	cap->shaper_private_dual_rate_n_max = 0;
+	cap->shaper_private_rate_min = 0;
+	/* GBps */
+	cap->shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->shaper_private_packet_mode_supported = 0;
+	cap->shaper_private_byte_mode_supported = 1;
+	cap->shaper_shared_n_max = 0;
+	cap->shaper_shared_n_nodes_per_shaper_max = 0;
+	cap->shaper_shared_n_shapers_per_node_max = 0;
+	cap->shaper_shared_dual_rate_n_max = 0;
+	cap->shaper_shared_rate_min = 0;
+	cap->shaper_shared_rate_max = 0;
+	cap->shaper_shared_packet_mode_supported = 0;
+	cap->shaper_shared_byte_mode_supported = 0;
+	cap->sched_n_children_max = vf->num_queue_pairs;
+	cap->sched_sp_n_priorities_max = 1;
+	cap->sched_wfq_n_children_per_group_max = 0;
+	cap->sched_wfq_n_groups_max = 0;
+	cap->sched_wfq_weight_max = 1;
+	cap->sched_wfq_packet_mode_supported = 0;
+	cap->sched_wfq_byte_mode_supported = 0;
+	cap->cman_head_drop_supported = 0;
+	cap->dynamic_update_mask = 0;
+	cap->shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD;
+	cap->shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS;
+	cap->cman_wred_context_n_max = 0;
+	cap->cman_wred_context_private_n_max = 0;
+	cap->cman_wred_context_shared_n_max = 0;
+	cap->cman_wred_context_shared_n_nodes_per_context_max = 0;
+	cap->cman_wred_context_shared_n_contexts_per_node_max = 0;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (level_id >= IAVF_TM_NODE_TYPE_MAX) {
+		error->type = RTE_TM_ERROR_TYPE_LEVEL_ID;
+		error->message = "too deep level";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (level_id == IAVF_TM_NODE_TYPE_PORT) {
+		cap->n_nodes_max = 1;
+		cap->n_nodes_nonleaf_max = 1;
+		cap->n_nodes_leaf_max = 0;
+	} else if (level_id == IAVF_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = vf->num_queue_pairs;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = vf->num_queue_pairs;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IAVF_TM_NODE_TYPE_QUEUE) {
+		cap->nonleaf.shaper_private_supported = true;
+		cap->nonleaf.shaper_private_dual_rate_supported = false;
+		cap->nonleaf.shaper_private_rate_min = 0;
+		/* GBps */
+		cap->nonleaf.shaper_private_rate_max =
+			vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+		cap->nonleaf.shaper_private_packet_mode_supported = 0;
+		cap->nonleaf.shaper_private_byte_mode_supported = 1;
+		cap->nonleaf.shaper_shared_n_max = 0;
+		cap->nonleaf.shaper_shared_packet_mode_supported = 0;
+		cap->nonleaf.shaper_shared_byte_mode_supported = 0;
+		if (level_id == IAVF_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IAVF_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				vf->num_queue_pairs;
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+		cap->nonleaf.sched_wfq_packet_mode_supported = 0;
+		cap->nonleaf.sched_wfq_byte_mode_supported = 0;
+		cap->nonleaf.stats_mask = 0;
+
+		return 0;
+	}
+
+	/* queue node */
+	cap->leaf.shaper_private_supported = false;
+	cap->leaf.shaper_private_dual_rate_supported = false;
+	cap->leaf.shaper_private_rate_min = 0;
+	/* GBps */
+	cap->leaf.shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->leaf.shaper_private_packet_mode_supported = 0;
+	cap->leaf.shaper_private_byte_mode_supported = 1;
+	cap->leaf.shaper_shared_n_max = 0;
+	cap->leaf.shaper_shared_packet_mode_supported = 0;
+	cap->leaf.shaper_shared_byte_mode_supported = 0;
+	cap->leaf.cman_head_drop_supported = false;
+	cap->leaf.cman_wred_context_private_supported = true;
+	cap->leaf.cman_wred_context_shared_n_max = 0;
+	cap->leaf.stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_node_capabilities_get(struct rte_eth_dev *dev,
+			   uint32_t node_id,
+			   struct rte_tm_node_capabilities *cap,
+			   struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type;
+	struct virtchnl_qos_cap_elem tc_cap;
+	struct iavf_tm_node *tm_node;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "not support capability get";
+		return -EINVAL;
+	}
+
+	tc_cap = vf->qos_cap->cap[tm_node->tc];
+	if (tc_cap.tc_num != tm_node->tc) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "tc not match";
+		return -EINVAL;
+	}
+
+	cap->shaper_private_supported = true;
+	cap->shaper_private_dual_rate_supported = false;
+	cap->shaper_private_rate_min = tc_cap.shaper.committed;
+	cap->shaper_private_rate_max = tc_cap.shaper.peak;
+	cap->shaper_shared_n_max = 0;
+	cap->nonleaf.sched_n_children_max = vf->num_queue_pairs;
+	cap->nonleaf.sched_sp_n_priorities_max = 1;
+	cap->nonleaf.sched_wfq_n_children_per_group_max = 1;
+	cap->nonleaf.sched_wfq_n_groups_max = 0;
+	cap->nonleaf.sched_wfq_weight_max = tc_cap.weight;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct virtchnl_queue_tc_mapping *q_tc_mapping;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+	uint16_t size;
+	int index = 0, node_committed = 0;
+	int i, ret_val = IAVF_SUCCESS;
+
+	/* check if all TC nodes are set with VF vsi */
+	if (vf->tm_conf.nb_tc_node != vf->qos_cap->num_elem) {
+		PMD_DRV_LOG(ERR, "Does not set VF vsi nodes to all TCs");
+		ret_val = IAVF_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	size = sizeof(*q_tc_mapping) + sizeof(q_tc_mapping->tc[0]) *
+		(vf->qos_cap->num_elem - 1);
+	q_tc_mapping = rte_zmalloc("q_tc", size, 0);
+	if (!q_tc_mapping) {
+		ret_val = IAVF_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+
+	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
+	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
+	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->tc >= q_tc_mapping->num_tc) {
+			PMD_DRV_LOG(ERR, "TC%d is not enabled", tm_node->tc);
+			ret_val = IAVF_ERR_PARAM;
+			goto fail_clear;
+		}
+		q_tc_mapping->tc[tm_node->tc].req.queue_count++;
+		node_committed++;
+	}
+
+	/* All queues allocated to this VF should be mapped */
+	if (node_committed < vf->num_queue_pairs) {
+		PMD_DRV_LOG(ERR, "queue node is less than allocated queue pairs");
+		ret_val = IAVF_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	for (i = 0; i < q_tc_mapping->num_tc; i++) {
+		q_tc_mapping->tc[i].req.start_queue_id = index;
+		index += q_tc_mapping->tc[i].req.queue_count;
+	}
+
+	ret_val = iavf_set_q_tc_map(dev, q_tc_mapping, size);
+	if (ret_val)
+		goto fail_clear;
+
+	vf->tm_conf.committed = true;
+	return ret_val;
+
+fail_clear:
+	/* clear all the traffic manager configuration */
+	if (clear_on_fail) {
+		iavf_tm_conf_uninit(dev);
+		iavf_tm_conf_init(dev);
+	}
+	return ret_val;
+}
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 02e828f9b7..06dc663947 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -467,7 +467,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 		VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 		VIRTCHNL_VF_OFFLOAD_CRC |
 		VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-		VIRTCHNL_VF_LARGE_NUM_QPAIRS;
+		VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+		VIRTCHNL_VF_OFFLOAD_QOS;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -1550,6 +1551,59 @@ iavf_set_hena(struct iavf_adapter *adapter, uint64_t hena)
 	return err;
 }
 
+int
+iavf_get_qos_cap(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	uint32_t len;
+	int err;
+
+	args.ops = VIRTCHNL_OP_GET_QOS_CAPS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+	err = iavf_execute_vf_cmd(adapter, &args);
+
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_VF_RESOURCE");
+		return -1;
+	}
+
+	len =  sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+
+	rte_memcpy(vf->qos_cap, args.out_buffer,
+		   RTE_MIN(args.out_size, len));
+
+	return 0;
+}
+
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+		struct virtchnl_queue_tc_mapping *q_tc_mapping, uint16_t size)
+{
+	struct iavf_adapter *adapter =
+			IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_cmd_info args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.ops = VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP;
+	args.in_args = (uint8_t *)q_tc_mapping;
+	args.in_args_size = size;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	err = iavf_execute_vf_cmd(adapter, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "Failed to execute command of"
+			    " VIRTCHNL_OP_CONFIG_TC_MAP");
+	return err;
+}
+
 int
 iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			struct rte_ether_addr *mc_addrs,
diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build
index 6f222a9e87..f2010a8337 100644
--- a/drivers/net/iavf/meson.build
+++ b/drivers/net/iavf/meson.build
@@ -19,6 +19,7 @@ sources = files(
         'iavf_generic_flow.c',
         'iavf_fdir.c',
         'iavf_hash.c',
+        'iavf_tm.c',
 )
 
 if arch_subdir == 'x86'
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 6/7] net/iavf: check Tx packet with correct UP and queue
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (4 preceding siblings ...)
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
@ 2021-06-30  6:53   ` Ting Xu
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 7/7] doc: release note for ETS-based Tx QoS Ting Xu
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

Add check in the Tx packet preparation function, to guarantee that the
packet with specific user priority is distributed to the correct Tx
queue according to the configured Tx queue TC mapping.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h      | 10 +++++++++
 drivers/net/iavf/iavf_rxtx.c | 43 ++++++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_tm.c   | 13 +++++++++++
 3 files changed, 66 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index feb8337b55..b3bd078111 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -86,6 +86,8 @@
 
 #define IAVF_BITS_PER_BYTE 8
 
+#define IAVF_VLAN_TAG_PCP_OFFSET 13
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -165,6 +167,13 @@ struct iavf_tm_conf {
 	bool committed;
 };
 
+/* Struct to store queue TC mapping. Queue is continuous in one TC */
+struct iavf_qtc_map {
+	uint8_t	tc;
+	uint16_t start_queue_id;
+	uint16_t queue_count;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -213,6 +222,7 @@ struct iavf_info {
 	bool lv_enabled;
 
 	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_qtc_map *qtc_map;
 	struct iavf_tm_conf tm_conf;
 };
 
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 0361af0d85..eb6d83a165 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2342,14 +2342,49 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	return nb_tx;
 }
 
+/* Check if the packet with vlan user priority is transmitted in the
+ * correct queue.
+ */
+static int
+iavf_check_vlan_up2tc(struct iavf_tx_queue *txq, uint8_t tc, struct rte_mbuf *m)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t up;
+
+	up = m->vlan_tci >> IAVF_VLAN_TAG_PCP_OFFSET;
+
+	if (!(vf->qos_cap->cap[tc].tc_prio & BIT(up))) {
+		PMD_TX_LOG(ERR, "packet with vlan pcp %u cannot transmit in queue %u\n",
+			up, txq->queue_id);
+		return -1;
+	} else {
+		return 0;
+	}
+}
+
 /* TX prep functions */
 uint16_t
 iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
 	      uint16_t nb_pkts)
 {
 	int i, ret;
+	uint8_t tc = 0;
 	uint64_t ol_flags;
 	struct rte_mbuf *m;
+	struct iavf_tx_queue *txq = tx_queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (vf->tm_conf.committed) {
+		for (i = 0; i < vf->qos_cap->num_elem; i++) {
+			if (txq->queue_id >= vf->qtc_map[i].start_queue_id &&
+				txq->queue_id < (vf->qtc_map[i].start_queue_id +
+				vf->qtc_map[i].queue_count))
+				break;
+		}
+		tc = i;
+	}
 
 	for (i = 0; i < nb_pkts; i++) {
 		m = tx_pkts[i];
@@ -2385,6 +2420,14 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
 			rte_errno = -ret;
 			return i;
 		}
+
+		if (ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN)) {
+			ret = iavf_check_vlan_up2tc(txq, tc, m);
+			if (ret != 0) {
+				rte_errno = -ret;
+				return i;
+			}
+		}
 	}
 
 	return i;
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
index a8fc142c89..185b37b970 100644
--- a/drivers/net/iavf/iavf_tm.c
+++ b/drivers/net/iavf/iavf_tm.c
@@ -653,6 +653,7 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 	struct virtchnl_queue_tc_mapping *q_tc_mapping;
 	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
 	struct iavf_tm_node *tm_node;
+	struct iavf_qtc_map *qtc_map;
 	uint16_t size;
 	int index = 0, node_committed = 0;
 	int i, ret_val = IAVF_SUCCESS;
@@ -675,6 +676,7 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
 	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
 	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+
 	TAILQ_FOREACH(tm_node, queue_list, node) {
 		if (tm_node->tc >= q_tc_mapping->num_tc) {
 			PMD_DRV_LOG(ERR, "TC%d is not enabled", tm_node->tc);
@@ -692,15 +694,26 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 		goto fail_clear;
 	}
 
+	/* store the queue TC mapping info */
+	qtc_map = rte_zmalloc("qtc_map",
+		sizeof(struct iavf_qtc_map) * q_tc_mapping->num_tc, 0);
+	if (!qtc_map)
+		return IAVF_ERR_NO_MEMORY;
+
 	for (i = 0; i < q_tc_mapping->num_tc; i++) {
 		q_tc_mapping->tc[i].req.start_queue_id = index;
 		index += q_tc_mapping->tc[i].req.queue_count;
+		qtc_map[i].tc = i;
+		qtc_map[i].start_queue_id =
+			q_tc_mapping->tc[i].req.start_queue_id;
+		qtc_map[i].queue_count = q_tc_mapping->tc[i].req.queue_count;
 	}
 
 	ret_val = iavf_set_q_tc_map(dev, q_tc_mapping, size);
 	if (ret_val)
 		goto fail_clear;
 
+	vf->qtc_map = qtc_map;
 	vf->tm_conf.committed = true;
 	return ret_val;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v4 7/7] doc: release note for ETS-based Tx QoS
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (5 preceding siblings ...)
  2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
@ 2021-06-30  6:53   ` Ting Xu
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-06-30  6:53 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

Add 21.08 release note for ETS-based Tx QoS

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 doc/guides/rel_notes/release_21_08.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf3ce..35aa76a270 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Updated Intel iavf driver.**
+
+  * Added Tx QoS VF queue TC mapping.
+
+* **Updated Intel ice driver.**
+
+  * Added Tx QoS TC bandwidth configuration in DCF.
 
 Removed Items
 -------------
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (7 preceding siblings ...)
  2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-07-01 10:20 ` Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
                     ` (7 more replies)
  2021-07-01 11:41 ` Ting Xu
  9 siblings, 8 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch enables the ETS-based Tx QoS for IAVF. Kernel tool is used to
configure ETS first. DCF is used to set bandwidth limit for VFs of each
TC. IAVF is supported to query QoS capability and set queue TC mapping.
Traffic Management API is utilized to configure the QoS hierarchy
scheduler tree. The scheduler tree will be passed to hardware to enable
all above functions.

Ting Xu (7):
  common/iavf: support ETS-based QoS offload configuration
  net/ice/base: support DCF query port ETS adminq
  net/ice: support DCF link status event handling
  net/ice: support QoS config VF bandwidth in DCF
  net/iavf: query QoS cap and set queue TC mapping
  net/iavf: check Tx packet with correct UP and queue
  doc: release note for ETS-based Tx QoS

 doc/guides/rel_notes/release_21_08.rst |   7 +
 drivers/common/iavf/iavf_type.h        |   2 +
 drivers/common/iavf/virtchnl.h         | 131 +++++
 drivers/net/iavf/iavf.h                |  56 ++
 drivers/net/iavf/iavf_ethdev.c         |  34 ++
 drivers/net/iavf/iavf_rxtx.c           |  43 ++
 drivers/net/iavf/iavf_tm.c             | 737 ++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c          |  56 +-
 drivers/net/iavf/meson.build           |   1 +
 drivers/net/ice/base/ice_dcb.c         |   3 +-
 drivers/net/ice/ice_dcf.c              |   9 +-
 drivers/net/ice/ice_dcf.h              |  54 ++
 drivers/net/ice/ice_dcf_ethdev.c       |  68 ++-
 drivers/net/ice/ice_dcf_ethdev.h       |   3 +
 drivers/net/ice/ice_dcf_parent.c       |  81 +++
 drivers/net/ice/ice_dcf_sched.c        | 759 +++++++++++++++++++++++++
 drivers/net/ice/meson.build            |   3 +-
 17 files changed, 2040 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-07-01 10:20   ` Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
                     ` (6 subsequent siblings)
  7 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch adds new virtchnl opcodes and structures for QoS
configuration, which includes:
1. VIRTCHNL_VF_OFFLOAD_TC, to negotiate the capability supporting QoS
configuration. If VF and PF both have this flag, then the ETS-based QoS
offload function is supported.
2. VIRTCHNL_OP_DCF_CONFIG_BW, DCF is supposed to configure min and max
bandwidth for each VF per enabled TCs. To make the VSI node bandwidth
configuration work, DCF also needs to configure TC node bandwidth
directly.
3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such
as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The
configuration is previously set by DCB and DCF, and now is the potential
QoS capability of VF. VF can take it as reference to configure queue TC
mapping.
4. VIRTCHNL_OP_CONFIG_TC_MAP, set VF queues to TC mapping for all Tx and
Rx queues. Queues mapping to one TC should be continuous and all
allocated queues should be mapped.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/common/iavf/iavf_type.h |   2 +
 drivers/common/iavf/virtchnl.h  | 131 ++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)

diff --git a/drivers/common/iavf/iavf_type.h b/drivers/common/iavf/iavf_type.h
index f3815d523b..73dfb47e70 100644
--- a/drivers/common/iavf/iavf_type.h
+++ b/drivers/common/iavf/iavf_type.h
@@ -141,6 +141,8 @@ enum iavf_debug_mask {
 #define IAVF_PHY_LED_MODE_MASK			0xFFFF
 #define IAVF_PHY_LED_MODE_ORIG			0x80000000
 
+#define IAVF_MAX_TRAFFIC_CLASS	8
+
 /* Memory types */
 enum iavf_memset_type {
 	IAVF_NONDMA_MEM = 0,
diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 197edce8a1..1cf0866124 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -85,6 +85,10 @@ enum virtchnl_rx_hsplit {
 	VIRTCHNL_RX_HSPLIT_SPLIT_SCTP    = 8,
 };
 
+enum virtchnl_bw_limit_type {
+	VIRTCHNL_BW_SHAPER = 0,
+};
+
 #define VIRTCHNL_ETH_LENGTH_OF_ADDRESS	6
 /* END GENERIC DEFINES */
 
@@ -130,6 +134,7 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 	/* opcodes 34, 35, 36, and 37 are reserved */
+	VIRTCHNL_OP_DCF_CONFIG_BW = 37,
 	VIRTCHNL_OP_DCF_VLAN_OFFLOAD = 38,
 	VIRTCHNL_OP_DCF_CMD_DESC = 39,
 	VIRTCHNL_OP_DCF_CMD_BUFF = 40,
@@ -152,6 +157,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
 	VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2 = 58,
 	VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2 = 59,
+	VIRTCHNL_OP_GET_QOS_CAPS = 66,
+	VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP = 67,
 	VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
 	VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
 	VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
@@ -398,6 +405,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+#define VIRTCHNL_VF_OFFLOAD_QOS		BIT(29)
 #define VIRTCHNL_VF_CAP_DCF			BIT(30)
 	/* BIT(31) is reserved */
 
@@ -1285,6 +1293,14 @@ struct virtchnl_filter {
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
 
+struct virtchnl_shaper_bw {
+	/* Unit is Kbps */
+	u32 committed;
+	u32 peak;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw);
+
 /* VIRTCHNL_OP_DCF_GET_VSI_MAP
  * VF sends this message to get VSI mapping table.
  * PF responds with an indirect message containing VF's
@@ -1357,6 +1373,37 @@ struct virtchnl_dcf_vlan_offload {
 
 VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_dcf_vlan_offload);
 
+struct virtchnl_dcf_bw_cfg {
+	u8 tc_num;
+#define VIRTCHNL_DCF_BW_CIR		BIT(0)
+#define VIRTCHNL_DCF_BW_PIR		BIT(1)
+	u8 bw_type;
+	u8 pad[2];
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_dcf_bw_cfg);
+
+/* VIRTCHNL_OP_DCF_CONFIG_BW
+ * VF send this message to set the bandwidth configuration of each
+ * TC with a specific vf id. The flag node_type is to indicate that
+ * this message is to configure VSI node or TC node bandwidth.
+ */
+struct virtchnl_dcf_bw_cfg_list {
+	u16 vf_id;
+	u8 num_elem;
+#define VIRTCHNL_DCF_TARGET_TC_BW	0
+#define VIRTCHNL_DCF_TARGET_VF_BW	1
+	u8 node_type;
+	struct virtchnl_dcf_bw_cfg cfg[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_dcf_bw_cfg_list);
+
 struct virtchnl_supported_rxdids {
 	/* see enum virtchnl_rx_desc_id_bitmasks */
 	u64 supported_rxdids;
@@ -1768,6 +1815,62 @@ struct virtchnl_fdir_del {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 
+/* VIRTCHNL_OP_GET_QOS_CAPS
+ * VF sends this message to get its QoS Caps, such as
+ * TC number, Arbiter and Bandwidth.
+ */
+struct virtchnl_qos_cap_elem {
+	u8 tc_num;
+	u8 tc_prio;
+#define VIRTCHNL_ABITER_STRICT      0
+#define VIRTCHNL_ABITER_ETS         2
+	u8 arbiter;
+#define VIRTCHNL_STRICT_WEIGHT      1
+	u8 weight;
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem);
+
+struct virtchnl_qos_cap_list {
+	u16 vsi_id;
+	u16 num_elem;
+	struct virtchnl_qos_cap_elem cap[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_qos_cap_list);
+
+/* VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP
+ * VF sends message virtchnl_queue_tc_mapping to set queue to tc
+ * mapping for all the Tx and Rx queues with a specified VSI, and
+ * would get response about bitmap of valid user priorities
+ * associated with queues.
+ */
+struct virtchnl_queue_tc_mapping {
+	u16 vsi_id;
+	u16 num_tc;
+	u16 num_queue_pairs;
+	u8 pad[2];
+	union {
+		struct {
+			u16 start_queue_id;
+			u16 queue_count;
+		} req;
+		struct {
+#define VIRTCHNL_USER_PRIO_TYPE_UP	0
+#define VIRTCHNL_USER_PRIO_TYPE_DSCP	1
+			u16 prio_type;
+			u16 valid_prio_bitmap;
+		} resp;
+	} tc[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
+
 /* VIRTCHNL_OP_QUERY_FDIR_FILTER
  * VF sends this request to PF by filling out vsi_id,
  * flow_id and reset_counter. PF will return query_info
@@ -2118,6 +2221,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DCF_GET_VSI_MAP:
 	case VIRTCHNL_OP_DCF_GET_PKG_INFO:
 		break;
+	case VIRTCHNL_OP_DCF_CONFIG_BW:
+		valid_len = sizeof(struct virtchnl_dcf_bw_cfg_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_dcf_bw_cfg_list *cfg_list =
+				(struct virtchnl_dcf_bw_cfg_list *)msg;
+			if (cfg_list->num_elem == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (cfg_list->num_elem - 1) *
+					 sizeof(struct virtchnl_dcf_bw_cfg);
+		}
+		break;
 	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		break;
 	case VIRTCHNL_OP_ADD_RSS_CFG:
@@ -2133,6 +2249,21 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_QUERY_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_query);
 		break;
+	case VIRTCHNL_OP_GET_QOS_CAPS:
+		break;
+	case VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP:
+		valid_len = sizeof(struct virtchnl_queue_tc_mapping);
+		if (msglen >= valid_len) {
+			struct virtchnl_queue_tc_mapping *q_tc =
+				(struct virtchnl_queue_tc_mapping *)msg;
+			if (q_tc->num_tc == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (q_tc->num_tc - 1) *
+					 sizeof(q_tc->tc[0]);
+		}
+		break;
 	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
 		break;
 	case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
@ 2021-07-01 10:20   ` Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling Ting Xu
                     ` (5 subsequent siblings)
  7 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

In the adminq command query port ETS function, the root node teid is
needed. However, for DCF, the root node is not initialized, which will
cause error when we refer to the variable. In this patch, we will check
whether the root node is available or not first.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/base/ice_dcb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_dcb.c b/drivers/net/ice/base/ice_dcb.c
index c73fc095ff..9c9675f6ef 100644
--- a/drivers/net/ice/base/ice_dcb.c
+++ b/drivers/net/ice/base/ice_dcb.c
@@ -1524,7 +1524,8 @@ ice_aq_query_port_ets(struct ice_port_info *pi,
 		return ICE_ERR_PARAM;
 	cmd = &desc.params.port_ets;
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
-	cmd->port_teid = pi->root->info.node_teid;
+	if (pi->root)
+		cmd->port_teid = pi->root->info.node_teid;
 
 	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
 	return status;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
@ 2021-07-01 10:20   ` Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
                     ` (4 subsequent siblings)
  7 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

When link status changes, DCF will receive virtchnl PF event message.
Add support to handle this event, change link status and update link
info.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.h        |  6 ++++
 drivers/net/ice/ice_dcf_ethdev.c | 54 ++++++++++++++++++++++++++++++--
 drivers/net/ice/ice_dcf_parent.c | 51 ++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 0cb90b5e9f..587093b909 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -60,6 +60,10 @@ struct ice_dcf_hw {
 	uint16_t nb_msix;
 	uint16_t rxq_map[16];
 	struct virtchnl_eth_stats eth_stats_offset;
+
+	/* Link status */
+	bool link_up;
+	uint32_t link_speed;
 };
 
 int ice_dcf_execute_virtchnl_cmd(struct ice_dcf_hw *hw,
@@ -77,5 +81,7 @@ int ice_dcf_disable_queues(struct ice_dcf_hw *hw);
 int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 			struct virtchnl_eth_stats *pstats);
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
+int ice_dcf_link_update(struct rte_eth_dev *dev,
+		    __rte_unused int wait_to_complete);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index f73dc80bd9..0b40ebbec6 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -881,11 +881,59 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
-static int
-ice_dcf_link_update(__rte_unused struct rte_eth_dev *dev,
+int
+ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete)
 {
-	return 0;
+	struct ice_dcf_adapter *ad = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &ad->real_hw;
+	struct rte_eth_link new_link;
+
+	memset(&new_link, 0, sizeof(new_link));
+
+	/* Only read status info stored in VF, and the info is updated
+	 * when receive LINK_CHANGE event from PF by virtchnl.
+	 */
+	switch (hw->link_speed) {
+	case 10:
+		new_link.link_speed = ETH_SPEED_NUM_10M;
+		break;
+	case 100:
+		new_link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+	case 1000:
+		new_link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+	case 10000:
+		new_link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	case 20000:
+		new_link.link_speed = ETH_SPEED_NUM_20G;
+		break;
+	case 25000:
+		new_link.link_speed = ETH_SPEED_NUM_25G;
+		break;
+	case 40000:
+		new_link.link_speed = ETH_SPEED_NUM_40G;
+		break;
+	case 50000:
+		new_link.link_speed = ETH_SPEED_NUM_50G;
+		break;
+	case 100000:
+		new_link.link_speed = ETH_SPEED_NUM_100G;
+		break;
+	default:
+		new_link.link_speed = ETH_SPEED_NUM_NONE;
+		break;
+	}
+
+	new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	new_link.link_status = hw->link_up ? ETH_LINK_UP :
+					     ETH_LINK_DOWN;
+	new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+				ETH_LINK_SPEED_FIXED);
+
+	return rte_eth_linkstatus_set(dev, &new_link);
 }
 
 /* Add UDP tunneling port */
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 19420a0f58..788f6dd2a0 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -178,6 +178,44 @@ start_vsi_reset_thread(struct ice_dcf_hw *dcf_hw, bool vfr, uint16_t vf_id)
 	}
 }
 
+static uint32_t
+ice_dcf_convert_link_speed(enum virtchnl_link_speed virt_link_speed)
+{
+	uint32_t speed;
+
+	switch (virt_link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		speed = 2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		speed = 5000;
+		break;
+	default:
+		speed = 0;
+		break;
+	}
+
+	return speed;
+}
+
 void
 ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 			    uint8_t *msg, uint16_t msglen)
@@ -196,6 +234,19 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 		break;
 	case VIRTCHNL_EVENT_LINK_CHANGE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event");
+		dcf_hw->link_up = pf_msg->event_data.link_event.link_status;
+		if (dcf_hw->vf_res->vf_cap_flags &
+			VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+			dcf_hw->link_speed =
+				pf_msg->event_data.link_event_adv.link_speed;
+		} else {
+			enum virtchnl_link_speed speed;
+			speed = pf_msg->event_data.link_event.link_speed;
+			dcf_hw->link_speed = ice_dcf_convert_link_speed(speed);
+		}
+		ice_dcf_link_update(dcf_hw->eth_dev, 0);
+		rte_eth_dev_callback_process(dcf_hw->eth_dev,
+			RTE_ETH_EVENT_INTR_LSC, NULL);
 		break;
 	case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event");
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (2 preceding siblings ...)
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling Ting Xu
@ 2021-07-01 10:20   ` Ting Xu
  2021-07-07  9:23     ` Thomas Monjalon
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
                     ` (3 subsequent siblings)
  7 siblings, 1 reply; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch supports the ETS-based QoS configuration. It enables the DCF
to configure bandwidth limits for each VF VSI of different TCs. A
hierarchy scheduler tree is built with port, TC and VSI nodes.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.c        |   9 +-
 drivers/net/ice/ice_dcf.h        |  48 ++
 drivers/net/ice/ice_dcf_ethdev.c |  14 +
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  30 ++
 drivers/net/ice/ice_dcf_sched.c  | 759 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 7 files changed, 864 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index d72a6f357e..349d23ee4f 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -235,7 +235,8 @@ ice_dcf_get_vf_resource(struct ice_dcf_hw *hw)
 	caps = VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RX_POLLING |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED | VIRTCHNL_VF_CAP_DCF |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
+	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC |
+	       VIRTCHNL_VF_OFFLOAD_QOS;
 
 	err = ice_dcf_send_cmd_req_no_irq(hw, VIRTCHNL_OP_GET_VF_RESOURCES,
 					  (uint8_t *)&caps, sizeof(caps));
@@ -668,6 +669,9 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 		}
 	}
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		ice_dcf_tm_conf_init(eth_dev);
+
 	hw->eth_dev = eth_dev;
 	rte_intr_callback_register(&pci_dev->intr_handle,
 				   ice_dcf_dev_interrupt_handler, hw);
@@ -703,6 +707,9 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 	ice_dcf_mode_disable(hw);
 	iavf_shutdown_adminq(&hw->avf);
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		ice_dcf_tm_conf_uninit(eth_dev);
+
 	rte_free(hw->arq_buf);
 	rte_free(hw->vf_vsi_map);
 	rte_free(hw->vf_res);
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 587093b909..1c7653de3d 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -6,6 +6,7 @@
 #define _ICE_DCF_H_
 
 #include <ethdev_driver.h>
+#include <rte_tm_driver.h>
 
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
@@ -30,6 +31,49 @@ struct dcf_virtchnl_cmd {
 	volatile int pending;
 };
 
+struct ice_dcf_tm_shaper_profile {
+	TAILQ_ENTRY(ice_dcf_tm_shaper_profile) node;
+	uint32_t shaper_profile_id;
+	uint32_t reference_count;
+	struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(ice_dcf_shaper_profile_list, ice_dcf_tm_shaper_profile);
+
+/* Struct to store Traffic Manager node configuration. */
+struct ice_dcf_tm_node {
+	TAILQ_ENTRY(ice_dcf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct ice_dcf_tm_node *parent;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(ice_dcf_tm_node_list, ice_dcf_tm_node);
+
+/* node type of Traffic Manager */
+enum ice_dcf_tm_node_type {
+	ICE_DCF_TM_NODE_TYPE_PORT,
+	ICE_DCF_TM_NODE_TYPE_TC,
+	ICE_DCF_TM_NODE_TYPE_VSI,
+	ICE_DCF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct ice_dcf_tm_conf {
+	struct ice_dcf_shaper_profile_list shaper_profile_list;
+	struct ice_dcf_tm_node *root; /* root node - port */
+	struct ice_dcf_tm_node_list tc_list; /* node list for all the TCs */
+	struct ice_dcf_tm_node_list vsi_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_vsi_node;
+	bool committed;
+};
+
 struct ice_dcf_hw {
 	struct iavf_hw avf;
 
@@ -45,6 +89,8 @@ struct ice_dcf_hw {
 	uint16_t *vf_vsi_map;
 	uint16_t pf_vsi_id;
 
+	struct ice_dcf_tm_conf tm_conf;
+	struct ice_aqc_port_ets_elem *ets_config;
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
@@ -83,5 +129,7 @@ int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
 int ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete);
+void ice_dcf_tm_conf_init(struct rte_eth_dev *dev);
+void ice_dcf_tm_conf_uninit(struct rte_eth_dev *dev);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 0b40ebbec6..69fe6e63d1 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -622,6 +622,7 @@ ice_dcf_dev_stop(struct rte_eth_dev *dev)
 	ice_dcf_add_del_all_mac_addr(&dcf_ad->real_hw, false);
 	dev->data->dev_link.link_status = ETH_LINK_DOWN;
 	ad->pf.adapter_stopped = 1;
+	dcf_ad->real_hw.tm_conf.committed = false;
 
 	return 0;
 }
@@ -994,6 +995,18 @@ ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 	return ret;
 }
 
+static int
+ice_dcf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+		void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &ice_dcf_tm_ops;
+
+	return 0;
+}
+
 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.dev_start               = ice_dcf_dev_start,
 	.dev_stop                = ice_dcf_dev_stop,
@@ -1018,6 +1031,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.flow_ops_get            = ice_dcf_dev_flow_ops_get,
 	.udp_tunnel_port_add	 = ice_dcf_dev_udp_tunnel_port_add,
 	.udp_tunnel_port_del	 = ice_dcf_dev_udp_tunnel_port_del,
+	.tm_ops_get              = ice_dcf_tm_ops_get,
 };
 
 static int
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index e7c9d7fe41..8510e37119 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -7,6 +7,8 @@
 
 #include "base/ice_common.h"
 #include "base/ice_adminq_cmd.h"
+#include "base/ice_dcb.h"
+#include "base/ice_sched.h"
 
 #include "ice_ethdev.h"
 #include "ice_dcf.h"
@@ -52,6 +54,7 @@ struct ice_dcf_vf_repr {
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
 
+extern const struct rte_tm_ops ice_dcf_tm_ops;
 void ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 				 uint8_t *msg, uint16_t msglen);
 int ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 788f6dd2a0..0ea32cf8e9 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -264,6 +264,29 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 	}
 }
 
+static int
+ice_dcf_query_port_ets(struct ice_hw *parent_hw, struct ice_dcf_hw *real_hw)
+{
+	int ret;
+
+	real_hw->ets_config = (struct ice_aqc_port_ets_elem *)
+			ice_malloc(real_hw, sizeof(*real_hw->ets_config));
+	if (!real_hw->ets_config)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_query_port_ets(parent_hw->port_info,
+			real_hw->ets_config, sizeof(*real_hw->ets_config),
+			NULL);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "DCF Query Port ETS failed");
+		rte_free(real_hw->ets_config);
+		real_hw->ets_config = NULL;
+		return ret;
+	}
+
+	return ICE_SUCCESS;
+}
+
 static int
 ice_dcf_init_parent_hw(struct ice_hw *hw)
 {
@@ -486,6 +509,13 @@ ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev)
 		return err;
 	}
 
+	err = ice_dcf_query_port_ets(parent_hw, hw);
+	if (err) {
+		PMD_INIT_LOG(ERR, "failed to query port ets with error %d",
+			     err);
+		goto uninit_hw;
+	}
+
 	err = ice_dcf_load_pkg(parent_hw);
 	if (err) {
 		PMD_INIT_LOG(ERR, "failed to load package with error %d",
diff --git a/drivers/net/ice/ice_dcf_sched.c b/drivers/net/ice/ice_dcf_sched.c
new file mode 100644
index 0000000000..cc7433c541
--- /dev/null
+++ b/drivers/net/ice/ice_dcf_sched.c
@@ -0,0 +1,759 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "base/ice_sched.h"
+#include "ice_dcf_ethdev.h"
+
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+				   uint32_t shaper_profile_id,
+				   struct rte_tm_error *error);
+
+const struct rte_tm_ops ice_dcf_tm_ops = {
+	.shaper_profile_add = ice_dcf_shaper_profile_add,
+	.shaper_profile_delete = ice_dcf_shaper_profile_del,
+	.hierarchy_commit = ice_dcf_hierarchy_commit,
+	.node_add = ice_dcf_node_add,
+	.node_delete = ice_dcf_node_delete,
+};
+
+void
+ice_dcf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+
+	/* initialize shaper profile list */
+	TAILQ_INIT(&hw->tm_conf.shaper_profile_list);
+
+	/* initialize node configuration */
+	hw->tm_conf.root = NULL;
+	TAILQ_INIT(&hw->tm_conf.tc_list);
+	TAILQ_INIT(&hw->tm_conf.vsi_list);
+	hw->tm_conf.nb_tc_node = 0;
+	hw->tm_conf.nb_vsi_node = 0;
+	hw->tm_conf.committed = false;
+}
+
+void
+ice_dcf_tm_conf_uninit(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct ice_dcf_tm_node *tm_node;
+
+	/* clear node configuration */
+	while ((tm_node = TAILQ_FIRST(&hw->tm_conf.vsi_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	hw->tm_conf.nb_vsi_node = 0;
+	while ((tm_node = TAILQ_FIRST(&hw->tm_conf.tc_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	hw->tm_conf.nb_tc_node = 0;
+	if (hw->tm_conf.root) {
+		rte_free(hw->tm_conf.root);
+		hw->tm_conf.root = NULL;
+	}
+
+	/* Remove all shaper profiles */
+	while ((shaper_profile =
+	       TAILQ_FIRST(&hw->tm_conf.shaper_profile_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list,
+			     shaper_profile, node);
+		rte_free(shaper_profile);
+	}
+}
+
+static inline struct ice_dcf_tm_node *
+ice_dcf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum ice_dcf_tm_node_type *node_type)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct ice_dcf_tm_node_list *tc_list = &hw->tm_conf.tc_list;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (hw->tm_conf.root && hw->tm_conf.root->id == node_id) {
+		*node_type = ICE_DCF_TM_NODE_TYPE_PORT;
+		return hw->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, vsi_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_VSI;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct ice_dcf_tm_shaper_profile *
+ice_dcf_shaper_profile_search(struct rte_eth_dev *dev,
+			   uint32_t shaper_profile_id)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_shaper_profile_list *shaper_profile_list =
+		&hw->tm_conf.shaper_profile_list;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	TAILQ_FOREACH(shaper_profile, shaper_profile_list, node) {
+		if (shaper_profile_id == shaper_profile->shaper_profile_id)
+			return shaper_profile;
+	}
+
+	return NULL;
+}
+
+static int
+ice_dcf_node_param_check(struct ice_dcf_hw *hw, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= 8 * hw->num_vfs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type parent_node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_tm_shaper_profile *shaper_profile = NULL;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *parent_node;
+	struct ice_dcf_tm_node *tm_node;
+	uint16_t tc_nb = 1;
+	int i, ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = ice_dcf_node_param_check(hw, node_id, priority, weight,
+				   params, error);
+	if (ret)
+		return ret;
+
+	for (i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (hw->ets_config->tc_valid_bits & (1 << i))
+			tc_nb++;
+	}
+
+	/* check if the node is already existed */
+	if (ice_dcf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* check the shaper profile id */
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = ice_dcf_shaper_profile_search(dev,
+			params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
+	}
+
+	/* add root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != ICE_DCF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (hw->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("ice_dcf_tm_node",
+				      sizeof(struct ice_dcf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		hw->tm_conf.root = tm_node;
+
+		return 0;
+	}
+
+	/* TC or vsi node */
+	/* check the parent node */
+	parent_node = ice_dcf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != ICE_DCF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != ICE_DCF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not port or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != (uint32_t)(parent_node_type + 1)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the TC node number */
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (hw->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the vsi node number */
+		if (parent_node->reference_count >= hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many VSI for one TC";
+			return -EINVAL;
+		}
+		/* check the vsi node id */
+		if (node_id > tc_nb * hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large VSI id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or vsi node */
+	tm_node = rte_zmalloc("ice_dcf_tm_node",
+			      sizeof(struct ice_dcf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->priority = priority;
+	tm_node->weight = weight;
+	tm_node->shaper_profile = shaper_profile;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = hw->tm_conf.nb_tc_node;
+		hw->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.vsi_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		hw->tm_conf.nb_vsi_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	/* increase the reference counter of the shaper profile */
+	if (shaper_profile)
+		shaper_profile->reference_count++;
+
+	return 0;
+}
+
+static int
+ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = ice_dcf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
+		rte_free(tm_node);
+		hw->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or VSI node */
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
+	tm_node->parent->reference_count--;
+	if (node_type == ICE_DCF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		hw->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		hw->tm_conf.nb_vsi_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_param_check(struct rte_tm_shaper_params *profile,
+				struct rte_tm_error *error)
+{
+	/* min bucket size not supported */
+	if (profile->committed.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE;
+		error->message = "committed bucket size not supported";
+		return -EINVAL;
+	}
+	/* max bucket size not supported */
+	if (profile->peak.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE;
+		error->message = "peak bucket size not supported";
+		return -EINVAL;
+	}
+	/* length adjustment not supported */
+	if (profile->pkt_length_adjust) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN;
+		error->message = "packet length adjustment not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	int ret;
+
+	if (!profile || !error)
+		return -EINVAL;
+
+	ret = ice_dcf_shaper_profile_param_check(profile, error);
+	if (ret)
+		return ret;
+
+	shaper_profile = ice_dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID exist";
+		return -EINVAL;
+	}
+
+	shaper_profile = rte_zmalloc("ice_dcf_tm_shaper_profile",
+				     sizeof(struct ice_dcf_tm_shaper_profile),
+				     0);
+	if (!shaper_profile)
+		return -ENOMEM;
+	shaper_profile->shaper_profile_id = shaper_profile_id;
+	rte_memcpy(&shaper_profile->profile, profile,
+			 sizeof(struct rte_tm_shaper_params));
+	TAILQ_INSERT_TAIL(&hw->tm_conf.shaper_profile_list,
+			  shaper_profile, node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	if (!error)
+		return -EINVAL;
+
+	shaper_profile = ice_dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (!shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID not exist";
+		return -EINVAL;
+	}
+
+	/* don't delete a profile if it's used by one or several nodes */
+	if (shaper_profile->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
+		error->message = "profile in use";
+		return -EINVAL;
+	}
+
+	TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list, shaper_profile, node);
+	rte_free(shaper_profile);
+
+	return 0;
+}
+
+static int
+ice_dcf_set_vf_bw(struct ice_dcf_hw *hw,
+			struct virtchnl_dcf_bw_cfg_list *vf_bw,
+			uint16_t len)
+{
+	struct dcf_virtchnl_cmd args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.v_op = VIRTCHNL_OP_DCF_CONFIG_BW;
+	args.req_msg = (uint8_t *)vf_bw;
+	args.req_msglen  = len;
+	err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "fail to execute command %s",
+			    "VIRTCHNL_OP_DCF_CONFIG_BW");
+	return err;
+}
+
+static int
+ice_dcf_validate_tc_bw(struct virtchnl_dcf_bw_cfg_list *tc_bw,
+			uint32_t port_bw)
+{
+	struct virtchnl_dcf_bw_cfg *cfg;
+	bool lowest_cir_mark = false;
+	u32 total_peak, rest_peak;
+	u32 committed, peak;
+	int i;
+
+	total_peak = 0;
+	for (i = 0; i < tc_bw->num_elem; i++)
+		total_peak += tc_bw->cfg[i].shaper.peak;
+
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		cfg = &tc_bw->cfg[i];
+		peak = cfg->shaper.peak;
+		committed = cfg->shaper.committed;
+		rest_peak = total_peak - peak;
+
+		if (lowest_cir_mark && peak == 0) {
+			PMD_DRV_LOG(ERR, "Max bandwidth must be configured for TC%u",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (!lowest_cir_mark && committed)
+			lowest_cir_mark = true;
+
+		if (committed && committed + rest_peak > port_bw) {
+			PMD_DRV_LOG(ERR, "Total value of TC%u min bandwidth and other TCs' max bandwidth %ukbps should be less than port link speed %ukbps",
+				cfg->tc_num, committed + rest_peak, port_bw);
+			return -EINVAL;
+		}
+
+		if (committed && committed < ICE_SCHED_MIN_BW) {
+			PMD_DRV_LOG(ERR, "If TC%u min Tx bandwidth is set, it cannot be less than 500Kbps",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak && committed > peak) {
+			PMD_DRV_LOG(ERR, "TC%u Min Tx bandwidth cannot be greater than max Tx bandwidth",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak > port_bw) {
+			PMD_DRV_LOG(ERR, "TC%u max Tx bandwidth %uKbps is greater than current link speed %uKbps",
+				cfg->tc_num, peak, port_bw);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct virtchnl_dcf_bw_cfg_list *vf_bw;
+	struct virtchnl_dcf_bw_cfg_list *tc_bw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct rte_tm_shaper_params *profile;
+	struct ice_dcf_tm_node *tm_node;
+	uint32_t port_bw, cir_total;
+	uint16_t size, vf_id;
+	uint8_t num_elem = 0;
+	int i, ret_val = ICE_SUCCESS;
+
+	/* check if all TC nodes are set */
+	if (BIT(hw->tm_conf.nb_tc_node) & hw->ets_config->tc_valid_bits) {
+		PMD_DRV_LOG(ERR, "Not all enabled TC nodes are set");
+		ret_val = ICE_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	size = sizeof(struct virtchnl_dcf_bw_cfg_list) +
+		sizeof(struct virtchnl_dcf_bw_cfg) *
+		(hw->tm_conf.nb_tc_node - 1);
+	vf_bw = rte_zmalloc("vf_bw", size, 0);
+	if (!vf_bw) {
+		ret_val = ICE_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+	tc_bw = rte_zmalloc("tc_bw", size, 0);
+	if (!tc_bw) {
+		ret_val = ICE_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+
+	/* port bandwidth (Kbps) */
+	port_bw = hw->link_speed * 1000;
+	cir_total = 0;
+
+	/* init tc bw configuration */
+#define ICE_DCF_SCHED_TC_NODE 0xffff
+	tc_bw->vf_id = ICE_DCF_SCHED_TC_NODE;
+	tc_bw->node_type = VIRTCHNL_DCF_TARGET_TC_BW;
+	tc_bw->num_elem = hw->tm_conf.nb_tc_node;
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		tc_bw->cfg[i].tc_num = i;
+		tc_bw->cfg[i].type = VIRTCHNL_BW_SHAPER;
+		tc_bw->cfg[i].bw_type |=
+			VIRTCHNL_DCF_BW_PIR | VIRTCHNL_DCF_BW_CIR;
+	}
+
+	for (vf_id = 0; vf_id < hw->num_vfs; vf_id++) {
+		num_elem = 0;
+		vf_bw->vf_id = vf_id;
+		vf_bw->node_type = VIRTCHNL_DCF_TARGET_VF_BW;
+		TAILQ_FOREACH(tm_node, vsi_list, node) {
+			/* scan the nodes belong to one VSI */
+			if (tm_node->id - hw->num_vfs * tm_node->tc != vf_id)
+				continue;
+			vf_bw->cfg[num_elem].tc_num = tm_node->tc;
+			vf_bw->cfg[num_elem].type = VIRTCHNL_BW_SHAPER;
+			if (tm_node->shaper_profile) {
+				/* Transfer from Byte per seconds to Kbps */
+				profile = &tm_node->shaper_profile->profile;
+				vf_bw->cfg[num_elem].shaper.peak =
+				profile->peak.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].shaper.committed =
+				profile->committed.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].bw_type |=
+					VIRTCHNL_DCF_BW_PIR |
+					VIRTCHNL_DCF_BW_CIR;
+			}
+
+			/* update tc node bw configuration */
+			tc_bw->cfg[tm_node->tc].shaper.peak +=
+				vf_bw->cfg[num_elem].shaper.peak;
+			tc_bw->cfg[tm_node->tc].shaper.committed +=
+				vf_bw->cfg[num_elem].shaper.committed;
+
+			cir_total += vf_bw->cfg[num_elem].shaper.committed;
+			num_elem++;
+		}
+
+		/* check if all TC nodes are set with VF vsi nodes */
+		if (num_elem != hw->tm_conf.nb_tc_node) {
+			PMD_DRV_LOG(ERR, "VF%u vsi nodes are not set to all TC nodes, node id should be continuous",
+				    vf_id);
+			ret_val = ICE_ERR_PARAM;
+			goto fail_clear;
+		}
+
+		vf_bw->num_elem = num_elem;
+		ret_val = ice_dcf_set_vf_bw(hw, vf_bw, size);
+		if (ret_val)
+			goto fail_clear;
+		memset(vf_bw, 0, size);
+	}
+
+	/* check if total CIR is larger than port bandwidth */
+	if (cir_total > port_bw) {
+		PMD_DRV_LOG(ERR, "Total CIR of all VFs is larger than port bandwidth");
+		ret_val = ICE_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	/* check and commit tc node bw configuration */
+	ret_val = ice_dcf_validate_tc_bw(tc_bw, port_bw);
+	if (ret_val)
+		goto fail_clear;
+	ret_val = ice_dcf_set_vf_bw(hw, tc_bw, size);
+	if (ret_val)
+		goto fail_clear;
+
+	hw->tm_conf.committed = true;
+	return ret_val;
+
+fail_clear:
+	/* clear all the traffic manager configuration */
+	if (clear_on_fail) {
+		ice_dcf_tm_conf_uninit(dev);
+		ice_dcf_tm_conf_init(dev);
+	}
+	return ret_val;
+}
diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
index 65750d3501..0b86d74a49 100644
--- a/drivers/net/ice/meson.build
+++ b/drivers/net/ice/meson.build
@@ -70,6 +70,7 @@ endif
 sources += files('ice_dcf.c',
          'ice_dcf_vf_representor.c',
          'ice_dcf_ethdev.c',
-         'ice_dcf_parent.c')
+         'ice_dcf_parent.c',
+	 'ice_dcf_sched.c')
 
 headers = files('rte_pmd_ice.h')
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (3 preceding siblings ...)
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
@ 2021-07-01 10:20   ` Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch added the support for VF to config the ETS-based Tx QoS,
including querying current QoS configuration from PF and config queue TC
mapping. PF QoS is configured in advance and the queried info is
provided to the user for future usage. VF queues are mapped to different
TCs in PF through virtchnl.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h        |  46 +++
 drivers/net/iavf/iavf_ethdev.c |  34 ++
 drivers/net/iavf/iavf_tm.c     | 724 +++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c  |  56 ++-
 drivers/net/iavf/meson.build   |   1 +
 5 files changed, 860 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4f5811ae87..feb8337b55 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -6,6 +6,8 @@
 #define _IAVF_ETHDEV_H_
 
 #include <rte_kvargs.h>
+#include <rte_tm_driver.h>
+
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
 #include <iavf_type.h>
@@ -82,6 +84,8 @@
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
 
+#define IAVF_BITS_PER_BYTE 8
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -129,6 +133,38 @@ enum iavf_aq_result {
 	IAVF_MSG_CMD,      /* Read async command result */
 };
 
+/* Struct to store Traffic Manager node configuration. */
+struct iavf_tm_node {
+	TAILQ_ENTRY(iavf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct iavf_tm_node *parent;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
+
+/* node type of Traffic Manager */
+enum iavf_tm_node_type {
+	IAVF_TM_NODE_TYPE_PORT,
+	IAVF_TM_NODE_TYPE_TC,
+	IAVF_TM_NODE_TYPE_QUEUE,
+	IAVF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct iavf_tm_conf {
+	struct iavf_tm_node *root; /* root node - vf vsi */
+	struct iavf_tm_node_list tc_list; /* node list for all the TCs */
+	struct iavf_tm_node_list queue_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_queue_node;
+	bool committed;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -175,6 +211,9 @@ struct iavf_info {
 	struct iavf_fdir_info fdir; /* flow director info */
 	/* indicate large VF support enabled or not */
 	bool lv_enabled;
+
+	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_tm_conf tm_conf;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -344,4 +383,11 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			uint32_t mc_addrs_num, bool add);
 int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
+int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+			struct virtchnl_queue_tc_mapping *q_tc_mapping,
+			uint16_t size);
+void iavf_tm_conf_init(struct rte_eth_dev *dev);
+void iavf_tm_conf_uninit(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops iavf_tm_ops;
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 5290588b17..93f82773e2 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -122,6 +122,7 @@ static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
 static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
 			uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
@@ -200,8 +201,21 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
 	.flow_ops_get               = iavf_dev_flow_ops_get,
 	.tx_done_cleanup	    = iavf_dev_tx_done_cleanup,
 	.get_monitor_addr           = iavf_get_monitor_addr,
+	.tm_ops_get                 = iavf_tm_ops_get,
 };
 
+static int
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+			void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &iavf_tm_ops;
+
+	return 0;
+}
+
 static int
 iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
@@ -806,6 +820,11 @@ iavf_dev_start(struct rte_eth_dev *dev)
 				      dev->data->nb_tx_queues);
 	num_queue_pairs = vf->num_queue_pairs;
 
+	if (iavf_get_qos_cap(adapter)) {
+		PMD_INIT_LOG(ERR, "Failed to get qos capability");
+		return -1;
+	}
+
 	if (iavf_init_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "failed to do Queue init");
 		return -1;
@@ -891,6 +910,7 @@ iavf_dev_stop(struct rte_eth_dev *dev)
 
 	adapter->stopped = 1;
 	dev->data->dev_started = 0;
+	vf->tm_conf.committed = false;
 
 	return 0;
 }
@@ -2090,6 +2110,15 @@ iavf_init_vf(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
 		goto err_api;
 	}
+
+	bufsz = sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+	vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+	if (!vf->qos_cap) {
+		PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+		goto err_api;
+	}
+
 	if (iavf_get_vf_resource(adapter) != 0) {
 		PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
 		goto err_alloc;
@@ -2131,6 +2160,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
 	rte_free(vf->rss_key);
 	rte_free(vf->rss_lut);
 err_alloc:
+	rte_free(vf->qos_cap);
 	rte_free(vf->vf_res);
 	vf->vsi_res = NULL;
 err_api:
@@ -2299,6 +2329,8 @@ iavf_dev_init(struct rte_eth_dev *eth_dev)
 
 	iavf_default_rss_disable(adapter);
 
+	iavf_tm_conf_init(eth_dev);
+
 	return 0;
 }
 
@@ -2338,6 +2370,8 @@ iavf_dev_close(struct rte_eth_dev *dev)
 				     iavf_dev_interrupt_handler, dev);
 	iavf_disable_irq0(hw);
 
+	iavf_tm_conf_uninit(dev);
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
 		if (vf->rss_lut) {
 			rte_free(vf->rss_lut);
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
new file mode 100644
index 0000000000..79b6bcb79d
--- /dev/null
+++ b/drivers/net/iavf/iavf_tm.c
@@ -0,0 +1,724 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "iavf.h"
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error);
+static int iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error);
+static int iavf_node_capabilities_get(struct rte_eth_dev *dev,
+				      uint32_t node_id,
+				      struct rte_tm_node_capabilities *cap,
+				      struct rte_tm_error *error);
+static int iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error);
+
+const struct rte_tm_ops iavf_tm_ops = {
+	.node_add = iavf_tm_node_add,
+	.node_delete = iavf_tm_node_delete,
+	.capabilities_get = iavf_tm_capabilities_get,
+	.level_capabilities_get = iavf_level_capabilities_get,
+	.node_capabilities_get = iavf_node_capabilities_get,
+	.node_type_get = iavf_node_type_get,
+	.hierarchy_commit = iavf_hierarchy_commit,
+};
+
+void
+iavf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	/* initialize node configuration */
+	vf->tm_conf.root = NULL;
+	TAILQ_INIT(&vf->tm_conf.tc_list);
+	TAILQ_INIT(&vf->tm_conf.queue_list);
+	vf->tm_conf.nb_tc_node = 0;
+	vf->tm_conf.nb_queue_node = 0;
+	vf->tm_conf.committed = false;
+}
+
+void
+iavf_tm_conf_uninit(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node *tm_node;
+
+	/* clear node configuration */
+	while ((tm_node = TAILQ_FIRST(&vf->tm_conf.queue_list))) {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	vf->tm_conf.nb_queue_node = 0;
+	while ((tm_node = TAILQ_FIRST(&vf->tm_conf.tc_list))) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	vf->tm_conf.nb_tc_node = 0;
+	if (vf->tm_conf.root) {
+		rte_free(vf->tm_conf.root);
+		vf->tm_conf.root = NULL;
+	}
+}
+
+static inline struct iavf_tm_node *
+iavf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum iavf_tm_node_type *node_type)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node_list *tc_list = &vf->tm_conf.tc_list;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+
+	if (vf->tm_conf.root && vf->tm_conf.root->id == node_id) {
+		*node_type = IAVF_TM_NODE_TYPE_PORT;
+		return vf->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_QUEUE;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+iavf_node_param_check(struct iavf_info *vf, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shaper profile */
+	if (params->shaper_profile_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+		error->message = "shaper profile not supported";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= vf->num_queue_pairs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error)
+{
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!is_leaf || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type == IAVF_TM_NODE_TYPE_QUEUE)
+		*is_leaf = true;
+	else
+		*is_leaf = false;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	enum iavf_tm_node_type parent_node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+	struct iavf_tm_node *parent_node;
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+	int ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = iavf_node_param_check(vf, node_id, priority, weight,
+				    params, error);
+	if (ret)
+		return ret;
+
+	/* check if the node is already existed */
+	if (iavf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != IAVF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (vf->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("iavf_tm_node",
+				      sizeof(struct iavf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		vf->tm_conf.root = tm_node;
+		return 0;
+	}
+
+	/* TC or queue node */
+	/* check the parent node */
+	parent_node = iavf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != IAVF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not root or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the node number */
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (vf->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the queue number */
+		if (parent_node->reference_count >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many queues";
+			return -EINVAL;
+		}
+		if (node_id >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large queue id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or queue node */
+	tm_node = rte_zmalloc("iavf_tm_node",
+			      sizeof(struct iavf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = vf->tm_conf.nb_tc_node;
+		vf->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.queue_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		vf->tm_conf.nb_queue_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == IAVF_TM_NODE_TYPE_PORT) {
+		rte_free(tm_node);
+		vf->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or queue node */
+	tm_node->parent->reference_count--;
+	if (node_type == IAVF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		vf->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		vf->tm_conf.nb_queue_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (tc_nb > vf->vf_res->num_queue_pairs)
+		return -EINVAL;
+
+	error->type = RTE_TM_ERROR_TYPE_NONE;
+
+	/* set all the parameters to 0 first. */
+	memset(cap, 0, sizeof(struct rte_tm_capabilities));
+
+	/**
+	 * support port + TCs + queues
+	 * here shows the max capability not the current configuration.
+	 */
+	cap->n_nodes_max = 1 + IAVF_MAX_TRAFFIC_CLASS
+		+ vf->num_queue_pairs;
+	cap->n_levels_max = 3; /* port, TC, queue */
+	cap->non_leaf_nodes_identical = 1;
+	cap->leaf_nodes_identical = 1;
+	cap->shaper_n_max = cap->n_nodes_max;
+	cap->shaper_private_n_max = cap->n_nodes_max;
+	cap->shaper_private_dual_rate_n_max = 0;
+	cap->shaper_private_rate_min = 0;
+	/* GBps */
+	cap->shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->shaper_private_packet_mode_supported = 0;
+	cap->shaper_private_byte_mode_supported = 1;
+	cap->shaper_shared_n_max = 0;
+	cap->shaper_shared_n_nodes_per_shaper_max = 0;
+	cap->shaper_shared_n_shapers_per_node_max = 0;
+	cap->shaper_shared_dual_rate_n_max = 0;
+	cap->shaper_shared_rate_min = 0;
+	cap->shaper_shared_rate_max = 0;
+	cap->shaper_shared_packet_mode_supported = 0;
+	cap->shaper_shared_byte_mode_supported = 0;
+	cap->sched_n_children_max = vf->num_queue_pairs;
+	cap->sched_sp_n_priorities_max = 1;
+	cap->sched_wfq_n_children_per_group_max = 0;
+	cap->sched_wfq_n_groups_max = 0;
+	cap->sched_wfq_weight_max = 1;
+	cap->sched_wfq_packet_mode_supported = 0;
+	cap->sched_wfq_byte_mode_supported = 0;
+	cap->cman_head_drop_supported = 0;
+	cap->dynamic_update_mask = 0;
+	cap->shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD;
+	cap->shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS;
+	cap->cman_wred_context_n_max = 0;
+	cap->cman_wred_context_private_n_max = 0;
+	cap->cman_wred_context_shared_n_max = 0;
+	cap->cman_wred_context_shared_n_nodes_per_context_max = 0;
+	cap->cman_wred_context_shared_n_contexts_per_node_max = 0;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (level_id >= IAVF_TM_NODE_TYPE_MAX) {
+		error->type = RTE_TM_ERROR_TYPE_LEVEL_ID;
+		error->message = "too deep level";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (level_id == IAVF_TM_NODE_TYPE_PORT) {
+		cap->n_nodes_max = 1;
+		cap->n_nodes_nonleaf_max = 1;
+		cap->n_nodes_leaf_max = 0;
+	} else if (level_id == IAVF_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = vf->num_queue_pairs;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = vf->num_queue_pairs;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IAVF_TM_NODE_TYPE_QUEUE) {
+		cap->nonleaf.shaper_private_supported = true;
+		cap->nonleaf.shaper_private_dual_rate_supported = false;
+		cap->nonleaf.shaper_private_rate_min = 0;
+		/* GBps */
+		cap->nonleaf.shaper_private_rate_max =
+			vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+		cap->nonleaf.shaper_private_packet_mode_supported = 0;
+		cap->nonleaf.shaper_private_byte_mode_supported = 1;
+		cap->nonleaf.shaper_shared_n_max = 0;
+		cap->nonleaf.shaper_shared_packet_mode_supported = 0;
+		cap->nonleaf.shaper_shared_byte_mode_supported = 0;
+		if (level_id == IAVF_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IAVF_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				vf->num_queue_pairs;
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+		cap->nonleaf.sched_wfq_packet_mode_supported = 0;
+		cap->nonleaf.sched_wfq_byte_mode_supported = 0;
+		cap->nonleaf.stats_mask = 0;
+
+		return 0;
+	}
+
+	/* queue node */
+	cap->leaf.shaper_private_supported = false;
+	cap->leaf.shaper_private_dual_rate_supported = false;
+	cap->leaf.shaper_private_rate_min = 0;
+	/* GBps */
+	cap->leaf.shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->leaf.shaper_private_packet_mode_supported = 0;
+	cap->leaf.shaper_private_byte_mode_supported = 1;
+	cap->leaf.shaper_shared_n_max = 0;
+	cap->leaf.shaper_shared_packet_mode_supported = 0;
+	cap->leaf.shaper_shared_byte_mode_supported = 0;
+	cap->leaf.cman_head_drop_supported = false;
+	cap->leaf.cman_wred_context_private_supported = true;
+	cap->leaf.cman_wred_context_shared_n_max = 0;
+	cap->leaf.stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_node_capabilities_get(struct rte_eth_dev *dev,
+			   uint32_t node_id,
+			   struct rte_tm_node_capabilities *cap,
+			   struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type;
+	struct virtchnl_qos_cap_elem tc_cap;
+	struct iavf_tm_node *tm_node;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "not support capability get";
+		return -EINVAL;
+	}
+
+	tc_cap = vf->qos_cap->cap[tm_node->tc];
+	if (tc_cap.tc_num != tm_node->tc) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "tc not match";
+		return -EINVAL;
+	}
+
+	cap->shaper_private_supported = true;
+	cap->shaper_private_dual_rate_supported = false;
+	cap->shaper_private_rate_min = tc_cap.shaper.committed;
+	cap->shaper_private_rate_max = tc_cap.shaper.peak;
+	cap->shaper_shared_n_max = 0;
+	cap->nonleaf.sched_n_children_max = vf->num_queue_pairs;
+	cap->nonleaf.sched_sp_n_priorities_max = 1;
+	cap->nonleaf.sched_wfq_n_children_per_group_max = 1;
+	cap->nonleaf.sched_wfq_n_groups_max = 0;
+	cap->nonleaf.sched_wfq_weight_max = tc_cap.weight;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_adapter *adapter =
+		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct virtchnl_queue_tc_mapping *q_tc_mapping;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+	uint16_t size;
+	int index = 0, node_committed = 0;
+	int i, ret_val = IAVF_SUCCESS;
+
+	/* check if port is stopped */
+	if (adapter->stopped != 1) {
+		PMD_DRV_LOG(ERR, "Please stop port first");
+		ret_val = IAVF_ERR_NOT_READY;
+		goto err;
+	}
+
+	/* check if all TC nodes are set with VF vsi */
+	if (vf->tm_conf.nb_tc_node != vf->qos_cap->num_elem) {
+		PMD_DRV_LOG(ERR, "Does not set VF vsi nodes to all TCs");
+		ret_val = IAVF_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	size = sizeof(*q_tc_mapping) + sizeof(q_tc_mapping->tc[0]) *
+		(vf->qos_cap->num_elem - 1);
+	q_tc_mapping = rte_zmalloc("q_tc", size, 0);
+	if (!q_tc_mapping) {
+		ret_val = IAVF_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+
+	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
+	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
+	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->tc >= q_tc_mapping->num_tc) {
+			PMD_DRV_LOG(ERR, "TC%d is not enabled", tm_node->tc);
+			ret_val = IAVF_ERR_PARAM;
+			goto fail_clear;
+		}
+		q_tc_mapping->tc[tm_node->tc].req.queue_count++;
+		node_committed++;
+	}
+
+	/* All queues allocated to this VF should be mapped */
+	if (node_committed < vf->num_queue_pairs) {
+		PMD_DRV_LOG(ERR, "queue node is less than allocated queue pairs");
+		ret_val = IAVF_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	for (i = 0; i < q_tc_mapping->num_tc; i++) {
+		q_tc_mapping->tc[i].req.start_queue_id = index;
+		index += q_tc_mapping->tc[i].req.queue_count;
+	}
+
+	ret_val = iavf_set_q_tc_map(dev, q_tc_mapping, size);
+	if (ret_val)
+		goto fail_clear;
+
+	vf->tm_conf.committed = true;
+	return ret_val;
+
+fail_clear:
+	/* clear all the traffic manager configuration */
+	if (clear_on_fail) {
+		iavf_tm_conf_uninit(dev);
+		iavf_tm_conf_init(dev);
+	}
+err:
+	return ret_val;
+}
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 02e828f9b7..06dc663947 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -467,7 +467,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 		VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 		VIRTCHNL_VF_OFFLOAD_CRC |
 		VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-		VIRTCHNL_VF_LARGE_NUM_QPAIRS;
+		VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+		VIRTCHNL_VF_OFFLOAD_QOS;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -1550,6 +1551,59 @@ iavf_set_hena(struct iavf_adapter *adapter, uint64_t hena)
 	return err;
 }
 
+int
+iavf_get_qos_cap(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	uint32_t len;
+	int err;
+
+	args.ops = VIRTCHNL_OP_GET_QOS_CAPS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+	err = iavf_execute_vf_cmd(adapter, &args);
+
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_VF_RESOURCE");
+		return -1;
+	}
+
+	len =  sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+
+	rte_memcpy(vf->qos_cap, args.out_buffer,
+		   RTE_MIN(args.out_size, len));
+
+	return 0;
+}
+
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+		struct virtchnl_queue_tc_mapping *q_tc_mapping, uint16_t size)
+{
+	struct iavf_adapter *adapter =
+			IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_cmd_info args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.ops = VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP;
+	args.in_args = (uint8_t *)q_tc_mapping;
+	args.in_args_size = size;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	err = iavf_execute_vf_cmd(adapter, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "Failed to execute command of"
+			    " VIRTCHNL_OP_CONFIG_TC_MAP");
+	return err;
+}
+
 int
 iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			struct rte_ether_addr *mc_addrs,
diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build
index 6f222a9e87..f2010a8337 100644
--- a/drivers/net/iavf/meson.build
+++ b/drivers/net/iavf/meson.build
@@ -19,6 +19,7 @@ sources = files(
         'iavf_generic_flow.c',
         'iavf_fdir.c',
         'iavf_hash.c',
+        'iavf_tm.c',
 )
 
 if arch_subdir == 'x86'
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (4 preceding siblings ...)
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
@ 2021-07-01 10:20   ` Ting Xu
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS Ting Xu
  2021-07-02  3:00   ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Zhang, Qi Z
  7 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

Add check in the Tx packet preparation function, to guarantee that the
packet with specific user priority is distributed to the correct Tx
queue according to the configured Tx queue TC mapping.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h      | 10 +++++++++
 drivers/net/iavf/iavf_rxtx.c | 43 ++++++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_tm.c   | 13 +++++++++++
 3 files changed, 66 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index feb8337b55..b3bd078111 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -86,6 +86,8 @@
 
 #define IAVF_BITS_PER_BYTE 8
 
+#define IAVF_VLAN_TAG_PCP_OFFSET 13
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -165,6 +167,13 @@ struct iavf_tm_conf {
 	bool committed;
 };
 
+/* Struct to store queue TC mapping. Queue is continuous in one TC */
+struct iavf_qtc_map {
+	uint8_t	tc;
+	uint16_t start_queue_id;
+	uint16_t queue_count;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -213,6 +222,7 @@ struct iavf_info {
 	bool lv_enabled;
 
 	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_qtc_map *qtc_map;
 	struct iavf_tm_conf tm_conf;
 };
 
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 0361af0d85..eb6d83a165 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -2342,14 +2342,49 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	return nb_tx;
 }
 
+/* Check if the packet with vlan user priority is transmitted in the
+ * correct queue.
+ */
+static int
+iavf_check_vlan_up2tc(struct iavf_tx_queue *txq, uint8_t tc, struct rte_mbuf *m)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t up;
+
+	up = m->vlan_tci >> IAVF_VLAN_TAG_PCP_OFFSET;
+
+	if (!(vf->qos_cap->cap[tc].tc_prio & BIT(up))) {
+		PMD_TX_LOG(ERR, "packet with vlan pcp %u cannot transmit in queue %u\n",
+			up, txq->queue_id);
+		return -1;
+	} else {
+		return 0;
+	}
+}
+
 /* TX prep functions */
 uint16_t
 iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
 	      uint16_t nb_pkts)
 {
 	int i, ret;
+	uint8_t tc = 0;
 	uint64_t ol_flags;
 	struct rte_mbuf *m;
+	struct iavf_tx_queue *txq = tx_queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (vf->tm_conf.committed) {
+		for (i = 0; i < vf->qos_cap->num_elem; i++) {
+			if (txq->queue_id >= vf->qtc_map[i].start_queue_id &&
+				txq->queue_id < (vf->qtc_map[i].start_queue_id +
+				vf->qtc_map[i].queue_count))
+				break;
+		}
+		tc = i;
+	}
 
 	for (i = 0; i < nb_pkts; i++) {
 		m = tx_pkts[i];
@@ -2385,6 +2420,14 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
 			rte_errno = -ret;
 			return i;
 		}
+
+		if (ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN)) {
+			ret = iavf_check_vlan_up2tc(txq, tc, m);
+			if (ret != 0) {
+				rte_errno = -ret;
+				return i;
+			}
+		}
 	}
 
 	return i;
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
index 79b6bcb79d..e36c4a5528 100644
--- a/drivers/net/iavf/iavf_tm.c
+++ b/drivers/net/iavf/iavf_tm.c
@@ -655,6 +655,7 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 	struct virtchnl_queue_tc_mapping *q_tc_mapping;
 	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
 	struct iavf_tm_node *tm_node;
+	struct iavf_qtc_map *qtc_map;
 	uint16_t size;
 	int index = 0, node_committed = 0;
 	int i, ret_val = IAVF_SUCCESS;
@@ -684,6 +685,7 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
 	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
 	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+
 	TAILQ_FOREACH(tm_node, queue_list, node) {
 		if (tm_node->tc >= q_tc_mapping->num_tc) {
 			PMD_DRV_LOG(ERR, "TC%d is not enabled", tm_node->tc);
@@ -701,15 +703,26 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 		goto fail_clear;
 	}
 
+	/* store the queue TC mapping info */
+	qtc_map = rte_zmalloc("qtc_map",
+		sizeof(struct iavf_qtc_map) * q_tc_mapping->num_tc, 0);
+	if (!qtc_map)
+		return IAVF_ERR_NO_MEMORY;
+
 	for (i = 0; i < q_tc_mapping->num_tc; i++) {
 		q_tc_mapping->tc[i].req.start_queue_id = index;
 		index += q_tc_mapping->tc[i].req.queue_count;
+		qtc_map[i].tc = i;
+		qtc_map[i].start_queue_id =
+			q_tc_mapping->tc[i].req.start_queue_id;
+		qtc_map[i].queue_count = q_tc_mapping->tc[i].req.queue_count;
 	}
 
 	ret_val = iavf_set_q_tc_map(dev, q_tc_mapping, size);
 	if (ret_val)
 		goto fail_clear;
 
+	vf->qtc_map = qtc_map;
 	vf->tm_conf.committed = true;
 	return ret_val;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (5 preceding siblings ...)
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
@ 2021-07-01 10:20   ` Ting Xu
  2021-07-02  3:00   ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Zhang, Qi Z
  7 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 10:20 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

Add 21.08 release note for ETS-based Tx QoS

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 doc/guides/rel_notes/release_21_08.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf3ce..35aa76a270 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Updated Intel iavf driver.**
+
+  * Added Tx QoS VF queue TC mapping.
+
+* **Updated Intel ice driver.**
+
+  * Added Tx QoS TC bandwidth configuration in DCF.
 
 Removed Items
 -------------
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF
  2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                   ` (8 preceding siblings ...)
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
@ 2021-07-01 11:41 ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
                     ` (6 more replies)
  9 siblings, 7 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch enables the ETS-based Tx QoS for IAVF. Kernel tool is used to
configure ETS first. DCF is used to set bandwidth limit for VFs of each
TC. IAVF is supported to query QoS capability and set queue TC mapping.
Traffic Management API is utilized to configure the QoS hierarchy
scheduler tree. The scheduler tree will be passed to hardware to enable
all above functions.

Ting Xu (7):
  common/iavf: support ETS-based QoS offload configuration
  net/ice/base: support DCF query port ETS adminq
  net/ice: support DCF link status event handling
  net/ice: support QoS config VF bandwidth in DCF
  net/iavf: query QoS cap and set queue TC mapping
  net/iavf: check Tx packet with correct UP and queue
  doc: release note for ETS-based Tx QoS

 doc/guides/rel_notes/release_21_08.rst |   7 +
 drivers/common/iavf/iavf_type.h        |   2 +
 drivers/common/iavf/virtchnl.h         | 131 +++++
 drivers/net/iavf/iavf.h                |  56 ++
 drivers/net/iavf/iavf_ethdev.c         |  37 ++
 drivers/net/iavf/iavf_rxtx.c           |  49 ++
 drivers/net/iavf/iavf_rxtx.h           |   1 +
 drivers/net/iavf/iavf_tm.c             | 743 ++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c          |  56 +-
 drivers/net/iavf/meson.build           |   1 +
 drivers/net/ice/base/ice_dcb.c         |   3 +-
 drivers/net/ice/ice_dcf.c              |   9 +-
 drivers/net/ice/ice_dcf.h              |  54 ++
 drivers/net/ice/ice_dcf_ethdev.c       |  68 ++-
 drivers/net/ice/ice_dcf_ethdev.h       |   3 +
 drivers/net/ice/ice_dcf_parent.c       |  83 +++
 drivers/net/ice/ice_dcf_sched.c        | 765 +++++++++++++++++++++++++
 drivers/net/ice/meson.build            |   3 +-
 18 files changed, 2064 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration
  2021-07-01 11:41 ` Ting Xu
@ 2021-07-01 11:41   ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch adds new virtchnl opcodes and structures for QoS
configuration, which includes:
1. VIRTCHNL_VF_OFFLOAD_TC, to negotiate the capability supporting QoS
configuration. If VF and PF both have this flag, then the ETS-based QoS
offload function is supported.
2. VIRTCHNL_OP_DCF_CONFIG_BW, DCF is supposed to configure min and max
bandwidth for each VF per enabled TCs. To make the VSI node bandwidth
configuration work, DCF also needs to configure TC node bandwidth
directly.
3. VIRTCHNL_OP_GET_QOS_CAPS, VF queries current QoS configuration, such
as enabled TCs, arbiter type, up2tc and bandwidth of VSI node. The
configuration is previously set by DCB and DCF, and now is the potential
QoS capability of VF. VF can take it as reference to configure queue TC
mapping.
4. VIRTCHNL_OP_CONFIG_TC_MAP, set VF queues to TC mapping for all Tx and
Rx queues. Queues mapping to one TC should be continuous and all
allocated queues should be mapped.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/common/iavf/iavf_type.h |   2 +
 drivers/common/iavf/virtchnl.h  | 131 ++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)

diff --git a/drivers/common/iavf/iavf_type.h b/drivers/common/iavf/iavf_type.h
index f3815d523b..73dfb47e70 100644
--- a/drivers/common/iavf/iavf_type.h
+++ b/drivers/common/iavf/iavf_type.h
@@ -141,6 +141,8 @@ enum iavf_debug_mask {
 #define IAVF_PHY_LED_MODE_MASK			0xFFFF
 #define IAVF_PHY_LED_MODE_ORIG			0x80000000
 
+#define IAVF_MAX_TRAFFIC_CLASS	8
+
 /* Memory types */
 enum iavf_memset_type {
 	IAVF_NONDMA_MEM = 0,
diff --git a/drivers/common/iavf/virtchnl.h b/drivers/common/iavf/virtchnl.h
index 197edce8a1..1cf0866124 100644
--- a/drivers/common/iavf/virtchnl.h
+++ b/drivers/common/iavf/virtchnl.h
@@ -85,6 +85,10 @@ enum virtchnl_rx_hsplit {
 	VIRTCHNL_RX_HSPLIT_SPLIT_SCTP    = 8,
 };
 
+enum virtchnl_bw_limit_type {
+	VIRTCHNL_BW_SHAPER = 0,
+};
+
 #define VIRTCHNL_ETH_LENGTH_OF_ADDRESS	6
 /* END GENERIC DEFINES */
 
@@ -130,6 +134,7 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 	/* opcodes 34, 35, 36, and 37 are reserved */
+	VIRTCHNL_OP_DCF_CONFIG_BW = 37,
 	VIRTCHNL_OP_DCF_VLAN_OFFLOAD = 38,
 	VIRTCHNL_OP_DCF_CMD_DESC = 39,
 	VIRTCHNL_OP_DCF_CMD_BUFF = 40,
@@ -152,6 +157,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57,
 	VIRTCHNL_OP_ENABLE_VLAN_FILTERING_V2 = 58,
 	VIRTCHNL_OP_DISABLE_VLAN_FILTERING_V2 = 59,
+	VIRTCHNL_OP_GET_QOS_CAPS = 66,
+	VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP = 67,
 	VIRTCHNL_OP_ENABLE_QUEUES_V2 = 107,
 	VIRTCHNL_OP_DISABLE_QUEUES_V2 = 108,
 	VIRTCHNL_OP_MAP_QUEUE_VECTOR = 111,
@@ -398,6 +405,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC	BIT(26)
 #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+#define VIRTCHNL_VF_OFFLOAD_QOS		BIT(29)
 #define VIRTCHNL_VF_CAP_DCF			BIT(30)
 	/* BIT(31) is reserved */
 
@@ -1285,6 +1293,14 @@ struct virtchnl_filter {
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
 
+struct virtchnl_shaper_bw {
+	/* Unit is Kbps */
+	u32 committed;
+	u32 peak;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw);
+
 /* VIRTCHNL_OP_DCF_GET_VSI_MAP
  * VF sends this message to get VSI mapping table.
  * PF responds with an indirect message containing VF's
@@ -1357,6 +1373,37 @@ struct virtchnl_dcf_vlan_offload {
 
 VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_dcf_vlan_offload);
 
+struct virtchnl_dcf_bw_cfg {
+	u8 tc_num;
+#define VIRTCHNL_DCF_BW_CIR		BIT(0)
+#define VIRTCHNL_DCF_BW_PIR		BIT(1)
+	u8 bw_type;
+	u8 pad[2];
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_dcf_bw_cfg);
+
+/* VIRTCHNL_OP_DCF_CONFIG_BW
+ * VF send this message to set the bandwidth configuration of each
+ * TC with a specific vf id. The flag node_type is to indicate that
+ * this message is to configure VSI node or TC node bandwidth.
+ */
+struct virtchnl_dcf_bw_cfg_list {
+	u16 vf_id;
+	u8 num_elem;
+#define VIRTCHNL_DCF_TARGET_TC_BW	0
+#define VIRTCHNL_DCF_TARGET_VF_BW	1
+	u8 node_type;
+	struct virtchnl_dcf_bw_cfg cfg[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_dcf_bw_cfg_list);
+
 struct virtchnl_supported_rxdids {
 	/* see enum virtchnl_rx_desc_id_bitmasks */
 	u64 supported_rxdids;
@@ -1768,6 +1815,62 @@ struct virtchnl_fdir_del {
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 
+/* VIRTCHNL_OP_GET_QOS_CAPS
+ * VF sends this message to get its QoS Caps, such as
+ * TC number, Arbiter and Bandwidth.
+ */
+struct virtchnl_qos_cap_elem {
+	u8 tc_num;
+	u8 tc_prio;
+#define VIRTCHNL_ABITER_STRICT      0
+#define VIRTCHNL_ABITER_ETS         2
+	u8 arbiter;
+#define VIRTCHNL_STRICT_WEIGHT      1
+	u8 weight;
+	enum virtchnl_bw_limit_type type;
+	union {
+		struct virtchnl_shaper_bw shaper;
+		u8 pad2[32];
+	};
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem);
+
+struct virtchnl_qos_cap_list {
+	u16 vsi_id;
+	u16 num_elem;
+	struct virtchnl_qos_cap_elem cap[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(44, virtchnl_qos_cap_list);
+
+/* VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP
+ * VF sends message virtchnl_queue_tc_mapping to set queue to tc
+ * mapping for all the Tx and Rx queues with a specified VSI, and
+ * would get response about bitmap of valid user priorities
+ * associated with queues.
+ */
+struct virtchnl_queue_tc_mapping {
+	u16 vsi_id;
+	u16 num_tc;
+	u16 num_queue_pairs;
+	u8 pad[2];
+	union {
+		struct {
+			u16 start_queue_id;
+			u16 queue_count;
+		} req;
+		struct {
+#define VIRTCHNL_USER_PRIO_TYPE_UP	0
+#define VIRTCHNL_USER_PRIO_TYPE_DSCP	1
+			u16 prio_type;
+			u16 valid_prio_bitmap;
+		} resp;
+	} tc[1];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_tc_mapping);
+
 /* VIRTCHNL_OP_QUERY_FDIR_FILTER
  * VF sends this request to PF by filling out vsi_id,
  * flow_id and reset_counter. PF will return query_info
@@ -2118,6 +2221,19 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DCF_GET_VSI_MAP:
 	case VIRTCHNL_OP_DCF_GET_PKG_INFO:
 		break;
+	case VIRTCHNL_OP_DCF_CONFIG_BW:
+		valid_len = sizeof(struct virtchnl_dcf_bw_cfg_list);
+		if (msglen >= valid_len) {
+			struct virtchnl_dcf_bw_cfg_list *cfg_list =
+				(struct virtchnl_dcf_bw_cfg_list *)msg;
+			if (cfg_list->num_elem == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (cfg_list->num_elem - 1) *
+					 sizeof(struct virtchnl_dcf_bw_cfg);
+		}
+		break;
 	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
 		break;
 	case VIRTCHNL_OP_ADD_RSS_CFG:
@@ -2133,6 +2249,21 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_QUERY_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_query);
 		break;
+	case VIRTCHNL_OP_GET_QOS_CAPS:
+		break;
+	case VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP:
+		valid_len = sizeof(struct virtchnl_queue_tc_mapping);
+		if (msglen >= valid_len) {
+			struct virtchnl_queue_tc_mapping *q_tc =
+				(struct virtchnl_queue_tc_mapping *)msg;
+			if (q_tc->num_tc == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += (q_tc->num_tc - 1) *
+					 sizeof(q_tc->tc[0]);
+		}
+		break;
 	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
 		break;
 	case VIRTCHNL_OP_ADD_VLAN_V2:
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq
  2021-07-01 11:41 ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
@ 2021-07-01 11:41   ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling Ting Xu
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

In the adminq command query port ETS function, the root node teid is
needed. However, for DCF, the root node is not initialized, which will
cause error when we refer to the variable. In this patch, we will check
whether the root node is available or not first.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/base/ice_dcb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_dcb.c b/drivers/net/ice/base/ice_dcb.c
index c73fc095ff..9c9675f6ef 100644
--- a/drivers/net/ice/base/ice_dcb.c
+++ b/drivers/net/ice/base/ice_dcb.c
@@ -1524,7 +1524,8 @@ ice_aq_query_port_ets(struct ice_port_info *pi,
 		return ICE_ERR_PARAM;
 	cmd = &desc.params.port_ets;
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
-	cmd->port_teid = pi->root->info.node_teid;
+	if (pi->root)
+		cmd->port_teid = pi->root->info.node_teid;
 
 	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
 	return status;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling
  2021-07-01 11:41 ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
@ 2021-07-01 11:41   ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

When link status changes, DCF will receive virtchnl PF event message.
Add support to handle this event, change link status and update link
info.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.h        |  6 ++++
 drivers/net/ice/ice_dcf_ethdev.c | 54 ++++++++++++++++++++++++++++++--
 drivers/net/ice/ice_dcf_parent.c | 51 ++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 0cb90b5e9f..587093b909 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -60,6 +60,10 @@ struct ice_dcf_hw {
 	uint16_t nb_msix;
 	uint16_t rxq_map[16];
 	struct virtchnl_eth_stats eth_stats_offset;
+
+	/* Link status */
+	bool link_up;
+	uint32_t link_speed;
 };
 
 int ice_dcf_execute_virtchnl_cmd(struct ice_dcf_hw *hw,
@@ -77,5 +81,7 @@ int ice_dcf_disable_queues(struct ice_dcf_hw *hw);
 int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 			struct virtchnl_eth_stats *pstats);
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
+int ice_dcf_link_update(struct rte_eth_dev *dev,
+		    __rte_unused int wait_to_complete);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index f73dc80bd9..0b40ebbec6 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -881,11 +881,59 @@ ice_dcf_dev_close(struct rte_eth_dev *dev)
 	return 0;
 }
 
-static int
-ice_dcf_link_update(__rte_unused struct rte_eth_dev *dev,
+int
+ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete)
 {
-	return 0;
+	struct ice_dcf_adapter *ad = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &ad->real_hw;
+	struct rte_eth_link new_link;
+
+	memset(&new_link, 0, sizeof(new_link));
+
+	/* Only read status info stored in VF, and the info is updated
+	 * when receive LINK_CHANGE event from PF by virtchnl.
+	 */
+	switch (hw->link_speed) {
+	case 10:
+		new_link.link_speed = ETH_SPEED_NUM_10M;
+		break;
+	case 100:
+		new_link.link_speed = ETH_SPEED_NUM_100M;
+		break;
+	case 1000:
+		new_link.link_speed = ETH_SPEED_NUM_1G;
+		break;
+	case 10000:
+		new_link.link_speed = ETH_SPEED_NUM_10G;
+		break;
+	case 20000:
+		new_link.link_speed = ETH_SPEED_NUM_20G;
+		break;
+	case 25000:
+		new_link.link_speed = ETH_SPEED_NUM_25G;
+		break;
+	case 40000:
+		new_link.link_speed = ETH_SPEED_NUM_40G;
+		break;
+	case 50000:
+		new_link.link_speed = ETH_SPEED_NUM_50G;
+		break;
+	case 100000:
+		new_link.link_speed = ETH_SPEED_NUM_100G;
+		break;
+	default:
+		new_link.link_speed = ETH_SPEED_NUM_NONE;
+		break;
+	}
+
+	new_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	new_link.link_status = hw->link_up ? ETH_LINK_UP :
+					     ETH_LINK_DOWN;
+	new_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
+				ETH_LINK_SPEED_FIXED);
+
+	return rte_eth_linkstatus_set(dev, &new_link);
 }
 
 /* Add UDP tunneling port */
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 19420a0f58..788f6dd2a0 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -178,6 +178,44 @@ start_vsi_reset_thread(struct ice_dcf_hw *dcf_hw, bool vfr, uint16_t vf_id)
 	}
 }
 
+static uint32_t
+ice_dcf_convert_link_speed(enum virtchnl_link_speed virt_link_speed)
+{
+	uint32_t speed;
+
+	switch (virt_link_speed) {
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = 100;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = 1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = 10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = 40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = 20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		speed = 2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		speed = 5000;
+		break;
+	default:
+		speed = 0;
+		break;
+	}
+
+	return speed;
+}
+
 void
 ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 			    uint8_t *msg, uint16_t msglen)
@@ -196,6 +234,19 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 		break;
 	case VIRTCHNL_EVENT_LINK_CHANGE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event");
+		dcf_hw->link_up = pf_msg->event_data.link_event.link_status;
+		if (dcf_hw->vf_res->vf_cap_flags &
+			VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+			dcf_hw->link_speed =
+				pf_msg->event_data.link_event_adv.link_speed;
+		} else {
+			enum virtchnl_link_speed speed;
+			speed = pf_msg->event_data.link_event.link_speed;
+			dcf_hw->link_speed = ice_dcf_convert_link_speed(speed);
+		}
+		ice_dcf_link_update(dcf_hw->eth_dev, 0);
+		rte_eth_dev_callback_process(dcf_hw->eth_dev,
+			RTE_ETH_EVENT_INTR_LSC, NULL);
 		break;
 	case VIRTCHNL_EVENT_PF_DRIVER_CLOSE:
 		PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event");
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF
  2021-07-01 11:41 ` Ting Xu
                     ` (2 preceding siblings ...)
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling Ting Xu
@ 2021-07-01 11:41   ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch supports the ETS-based QoS configuration. It enables the DCF
to configure bandwidth limits for each VF VSI of different TCs. A
hierarchy scheduler tree is built with port, TC and VSI nodes.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/ice/ice_dcf.c        |   9 +-
 drivers/net/ice/ice_dcf.h        |  48 ++
 drivers/net/ice/ice_dcf_ethdev.c |  14 +
 drivers/net/ice/ice_dcf_ethdev.h |   3 +
 drivers/net/ice/ice_dcf_parent.c |  32 ++
 drivers/net/ice/ice_dcf_sched.c  | 765 +++++++++++++++++++++++++++++++
 drivers/net/ice/meson.build      |   3 +-
 7 files changed, 872 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ice/ice_dcf_sched.c

diff --git a/drivers/net/ice/ice_dcf.c b/drivers/net/ice/ice_dcf.c
index d72a6f357e..349d23ee4f 100644
--- a/drivers/net/ice/ice_dcf.c
+++ b/drivers/net/ice/ice_dcf.c
@@ -235,7 +235,8 @@ ice_dcf_get_vf_resource(struct ice_dcf_hw *hw)
 	caps = VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RX_POLLING |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED | VIRTCHNL_VF_CAP_DCF |
 	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
+	       VF_BASE_MODE_OFFLOADS | VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC |
+	       VIRTCHNL_VF_OFFLOAD_QOS;
 
 	err = ice_dcf_send_cmd_req_no_irq(hw, VIRTCHNL_OP_GET_VF_RESOURCES,
 					  (uint8_t *)&caps, sizeof(caps));
@@ -668,6 +669,9 @@ ice_dcf_init_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 		}
 	}
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		ice_dcf_tm_conf_init(eth_dev);
+
 	hw->eth_dev = eth_dev;
 	rte_intr_callback_register(&pci_dev->intr_handle,
 				   ice_dcf_dev_interrupt_handler, hw);
@@ -703,6 +707,9 @@ ice_dcf_uninit_hw(struct rte_eth_dev *eth_dev, struct ice_dcf_hw *hw)
 	ice_dcf_mode_disable(hw);
 	iavf_shutdown_adminq(&hw->avf);
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		ice_dcf_tm_conf_uninit(eth_dev);
+
 	rte_free(hw->arq_buf);
 	rte_free(hw->vf_vsi_map);
 	rte_free(hw->vf_res);
diff --git a/drivers/net/ice/ice_dcf.h b/drivers/net/ice/ice_dcf.h
index 587093b909..1c7653de3d 100644
--- a/drivers/net/ice/ice_dcf.h
+++ b/drivers/net/ice/ice_dcf.h
@@ -6,6 +6,7 @@
 #define _ICE_DCF_H_
 
 #include <ethdev_driver.h>
+#include <rte_tm_driver.h>
 
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
@@ -30,6 +31,49 @@ struct dcf_virtchnl_cmd {
 	volatile int pending;
 };
 
+struct ice_dcf_tm_shaper_profile {
+	TAILQ_ENTRY(ice_dcf_tm_shaper_profile) node;
+	uint32_t shaper_profile_id;
+	uint32_t reference_count;
+	struct rte_tm_shaper_params profile;
+};
+
+TAILQ_HEAD(ice_dcf_shaper_profile_list, ice_dcf_tm_shaper_profile);
+
+/* Struct to store Traffic Manager node configuration. */
+struct ice_dcf_tm_node {
+	TAILQ_ENTRY(ice_dcf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct ice_dcf_tm_node *parent;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(ice_dcf_tm_node_list, ice_dcf_tm_node);
+
+/* node type of Traffic Manager */
+enum ice_dcf_tm_node_type {
+	ICE_DCF_TM_NODE_TYPE_PORT,
+	ICE_DCF_TM_NODE_TYPE_TC,
+	ICE_DCF_TM_NODE_TYPE_VSI,
+	ICE_DCF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct ice_dcf_tm_conf {
+	struct ice_dcf_shaper_profile_list shaper_profile_list;
+	struct ice_dcf_tm_node *root; /* root node - port */
+	struct ice_dcf_tm_node_list tc_list; /* node list for all the TCs */
+	struct ice_dcf_tm_node_list vsi_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_vsi_node;
+	bool committed;
+};
+
 struct ice_dcf_hw {
 	struct iavf_hw avf;
 
@@ -45,6 +89,8 @@ struct ice_dcf_hw {
 	uint16_t *vf_vsi_map;
 	uint16_t pf_vsi_id;
 
+	struct ice_dcf_tm_conf tm_conf;
+	struct ice_aqc_port_ets_elem *ets_config;
 	struct virtchnl_version_info virtchnl_version;
 	struct virtchnl_vf_resource *vf_res; /* VF resource */
 	struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */
@@ -83,5 +129,7 @@ int ice_dcf_query_stats(struct ice_dcf_hw *hw,
 int ice_dcf_add_del_all_mac_addr(struct ice_dcf_hw *hw, bool add);
 int ice_dcf_link_update(struct rte_eth_dev *dev,
 		    __rte_unused int wait_to_complete);
+void ice_dcf_tm_conf_init(struct rte_eth_dev *dev);
+void ice_dcf_tm_conf_uninit(struct rte_eth_dev *dev);
 
 #endif /* _ICE_DCF_H_ */
diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 0b40ebbec6..69fe6e63d1 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -622,6 +622,7 @@ ice_dcf_dev_stop(struct rte_eth_dev *dev)
 	ice_dcf_add_del_all_mac_addr(&dcf_ad->real_hw, false);
 	dev->data->dev_link.link_status = ETH_LINK_DOWN;
 	ad->pf.adapter_stopped = 1;
+	dcf_ad->real_hw.tm_conf.committed = false;
 
 	return 0;
 }
@@ -994,6 +995,18 @@ ice_dcf_dev_udp_tunnel_port_del(struct rte_eth_dev *dev,
 	return ret;
 }
 
+static int
+ice_dcf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+		void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &ice_dcf_tm_ops;
+
+	return 0;
+}
+
 static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.dev_start               = ice_dcf_dev_start,
 	.dev_stop                = ice_dcf_dev_stop,
@@ -1018,6 +1031,7 @@ static const struct eth_dev_ops ice_dcf_eth_dev_ops = {
 	.flow_ops_get            = ice_dcf_dev_flow_ops_get,
 	.udp_tunnel_port_add	 = ice_dcf_dev_udp_tunnel_port_add,
 	.udp_tunnel_port_del	 = ice_dcf_dev_udp_tunnel_port_del,
+	.tm_ops_get              = ice_dcf_tm_ops_get,
 };
 
 static int
diff --git a/drivers/net/ice/ice_dcf_ethdev.h b/drivers/net/ice/ice_dcf_ethdev.h
index e7c9d7fe41..8510e37119 100644
--- a/drivers/net/ice/ice_dcf_ethdev.h
+++ b/drivers/net/ice/ice_dcf_ethdev.h
@@ -7,6 +7,8 @@
 
 #include "base/ice_common.h"
 #include "base/ice_adminq_cmd.h"
+#include "base/ice_dcb.h"
+#include "base/ice_sched.h"
 
 #include "ice_ethdev.h"
 #include "ice_dcf.h"
@@ -52,6 +54,7 @@ struct ice_dcf_vf_repr {
 	struct ice_dcf_vlan outer_vlan_info; /* DCF always handle outer VLAN */
 };
 
+extern const struct rte_tm_ops ice_dcf_tm_ops;
 void ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 				 uint8_t *msg, uint16_t msglen);
 int ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev);
diff --git a/drivers/net/ice/ice_dcf_parent.c b/drivers/net/ice/ice_dcf_parent.c
index 788f6dd2a0..c59cd0bef9 100644
--- a/drivers/net/ice/ice_dcf_parent.c
+++ b/drivers/net/ice/ice_dcf_parent.c
@@ -264,6 +264,29 @@ ice_dcf_handle_pf_event_msg(struct ice_dcf_hw *dcf_hw,
 	}
 }
 
+static int
+ice_dcf_query_port_ets(struct ice_hw *parent_hw, struct ice_dcf_hw *real_hw)
+{
+	int ret;
+
+	real_hw->ets_config = (struct ice_aqc_port_ets_elem *)
+			ice_malloc(real_hw, sizeof(*real_hw->ets_config));
+	if (!real_hw->ets_config)
+		return ICE_ERR_NO_MEMORY;
+
+	ret = ice_aq_query_port_ets(parent_hw->port_info,
+			real_hw->ets_config, sizeof(*real_hw->ets_config),
+			NULL);
+	if (ret) {
+		PMD_DRV_LOG(ERR, "DCF Query Port ETS failed");
+		rte_free(real_hw->ets_config);
+		real_hw->ets_config = NULL;
+		return ret;
+	}
+
+	return ICE_SUCCESS;
+}
+
 static int
 ice_dcf_init_parent_hw(struct ice_hw *hw)
 {
@@ -486,6 +509,15 @@ ice_dcf_init_parent_adapter(struct rte_eth_dev *eth_dev)
 		return err;
 	}
 
+	if (hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS) {
+		err = ice_dcf_query_port_ets(parent_hw, hw);
+		if (err) {
+			PMD_INIT_LOG(ERR, "failed to query port ets with error %d",
+				     err);
+			goto uninit_hw;
+		}
+	}
+
 	err = ice_dcf_load_pkg(parent_hw);
 	if (err) {
 		PMD_INIT_LOG(ERR, "failed to load package with error %d",
diff --git a/drivers/net/ice/ice_dcf_sched.c b/drivers/net/ice/ice_dcf_sched.c
new file mode 100644
index 0000000000..4371bbc820
--- /dev/null
+++ b/drivers/net/ice/ice_dcf_sched.c
@@ -0,0 +1,765 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "base/ice_sched.h"
+#include "ice_dcf_ethdev.h"
+
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error);
+static int ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+				   uint32_t shaper_profile_id,
+				   struct rte_tm_error *error);
+
+const struct rte_tm_ops ice_dcf_tm_ops = {
+	.shaper_profile_add = ice_dcf_shaper_profile_add,
+	.shaper_profile_delete = ice_dcf_shaper_profile_del,
+	.hierarchy_commit = ice_dcf_hierarchy_commit,
+	.node_add = ice_dcf_node_add,
+	.node_delete = ice_dcf_node_delete,
+};
+
+void
+ice_dcf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+
+	/* initialize shaper profile list */
+	TAILQ_INIT(&hw->tm_conf.shaper_profile_list);
+
+	/* initialize node configuration */
+	hw->tm_conf.root = NULL;
+	TAILQ_INIT(&hw->tm_conf.tc_list);
+	TAILQ_INIT(&hw->tm_conf.vsi_list);
+	hw->tm_conf.nb_tc_node = 0;
+	hw->tm_conf.nb_vsi_node = 0;
+	hw->tm_conf.committed = false;
+}
+
+void
+ice_dcf_tm_conf_uninit(struct rte_eth_dev *dev)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	struct ice_dcf_tm_node *tm_node;
+
+	/* clear node configuration */
+	while ((tm_node = TAILQ_FIRST(&hw->tm_conf.vsi_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	hw->tm_conf.nb_vsi_node = 0;
+	while ((tm_node = TAILQ_FIRST(&hw->tm_conf.tc_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	hw->tm_conf.nb_tc_node = 0;
+	if (hw->tm_conf.root) {
+		rte_free(hw->tm_conf.root);
+		hw->tm_conf.root = NULL;
+	}
+
+	/* Remove all shaper profiles */
+	while ((shaper_profile =
+	       TAILQ_FIRST(&hw->tm_conf.shaper_profile_list))) {
+		TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list,
+			     shaper_profile, node);
+		rte_free(shaper_profile);
+	}
+}
+
+static inline struct ice_dcf_tm_node *
+ice_dcf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum ice_dcf_tm_node_type *node_type)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct ice_dcf_tm_node_list *tc_list = &hw->tm_conf.tc_list;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (hw->tm_conf.root && hw->tm_conf.root->id == node_id) {
+		*node_type = ICE_DCF_TM_NODE_TYPE_PORT;
+		return hw->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, vsi_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = ICE_DCF_TM_NODE_TYPE_VSI;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct ice_dcf_tm_shaper_profile *
+ice_dcf_shaper_profile_search(struct rte_eth_dev *dev,
+			   uint32_t shaper_profile_id)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_shaper_profile_list *shaper_profile_list =
+		&hw->tm_conf.shaper_profile_list;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	TAILQ_FOREACH(shaper_profile, shaper_profile_list, node) {
+		if (shaper_profile_id == shaper_profile->shaper_profile_id)
+			return shaper_profile;
+	}
+
+	return NULL;
+}
+
+static int
+ice_dcf_node_param_check(struct ice_dcf_hw *hw, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= 8 * hw->num_vfs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type parent_node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_tm_shaper_profile *shaper_profile = NULL;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *parent_node;
+	struct ice_dcf_tm_node *tm_node;
+	uint16_t tc_nb = 1;
+	int i, ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = ice_dcf_node_param_check(hw, node_id, priority, weight,
+				   params, error);
+	if (ret)
+		return ret;
+
+	for (i = 1; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		if (hw->ets_config->tc_valid_bits & (1 << i))
+			tc_nb++;
+	}
+
+	/* check if the node is already existed */
+	if (ice_dcf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* check the shaper profile id */
+	if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+		shaper_profile = ice_dcf_shaper_profile_search(dev,
+			params->shaper_profile_id);
+		if (!shaper_profile) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+			error->message = "shaper profile not exist";
+			return -EINVAL;
+		}
+	}
+
+	/* add root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != ICE_DCF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (hw->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("ice_dcf_tm_node",
+				      sizeof(struct ice_dcf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		hw->tm_conf.root = tm_node;
+
+		return 0;
+	}
+
+	/* TC or vsi node */
+	/* check the parent node */
+	parent_node = ice_dcf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != ICE_DCF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != ICE_DCF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not port or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != (uint32_t)(parent_node_type + 1)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the TC node number */
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (hw->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the vsi node number */
+		if (parent_node->reference_count >= hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many VSI for one TC";
+			return -EINVAL;
+		}
+		/* check the vsi node id */
+		if (node_id > tc_nb * hw->num_vfs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large VSI id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or vsi node */
+	tm_node = rte_zmalloc("ice_dcf_tm_node",
+			      sizeof(struct ice_dcf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->priority = priority;
+	tm_node->weight = weight;
+	tm_node->shaper_profile = shaper_profile;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = hw->tm_conf.nb_tc_node;
+		hw->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&hw->tm_conf.vsi_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		hw->tm_conf.nb_vsi_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	/* increase the reference counter of the shaper profile */
+	if (shaper_profile)
+		shaper_profile->reference_count++;
+
+	return 0;
+}
+
+static int
+ice_dcf_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	enum ice_dcf_tm_node_type node_type = ICE_DCF_TM_NODE_TYPE_MAX;
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (hw->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = ice_dcf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == ICE_DCF_TM_NODE_TYPE_PORT) {
+		if (tm_node->shaper_profile)
+			tm_node->shaper_profile->reference_count--;
+		rte_free(tm_node);
+		hw->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or VSI node */
+	if (tm_node->shaper_profile)
+		tm_node->shaper_profile->reference_count--;
+	tm_node->parent->reference_count--;
+	if (node_type == ICE_DCF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&hw->tm_conf.tc_list, tm_node, node);
+		hw->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&hw->tm_conf.vsi_list, tm_node, node);
+		hw->tm_conf.nb_vsi_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_param_check(struct rte_tm_shaper_params *profile,
+				struct rte_tm_error *error)
+{
+	/* min bucket size not supported */
+	if (profile->committed.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE;
+		error->message = "committed bucket size not supported";
+		return -EINVAL;
+	}
+	/* max bucket size not supported */
+	if (profile->peak.size) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE;
+		error->message = "peak bucket size not supported";
+		return -EINVAL;
+	}
+	/* length adjustment not supported */
+	if (profile->pkt_length_adjust) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN;
+		error->message = "packet length adjustment not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_add(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_shaper_params *profile,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+	int ret;
+
+	if (!profile || !error)
+		return -EINVAL;
+
+	ret = ice_dcf_shaper_profile_param_check(profile, error);
+	if (ret)
+		return ret;
+
+	shaper_profile = ice_dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID exist";
+		return -EINVAL;
+	}
+
+	shaper_profile = rte_zmalloc("ice_dcf_tm_shaper_profile",
+				     sizeof(struct ice_dcf_tm_shaper_profile),
+				     0);
+	if (!shaper_profile)
+		return -ENOMEM;
+	shaper_profile->shaper_profile_id = shaper_profile_id;
+	rte_memcpy(&shaper_profile->profile, profile,
+			 sizeof(struct rte_tm_shaper_params));
+	TAILQ_INSERT_TAIL(&hw->tm_conf.shaper_profile_list,
+			  shaper_profile, node);
+
+	return 0;
+}
+
+static int
+ice_dcf_shaper_profile_del(struct rte_eth_dev *dev,
+			uint32_t shaper_profile_id,
+			struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct ice_dcf_tm_shaper_profile *shaper_profile;
+
+	if (!error)
+		return -EINVAL;
+
+	shaper_profile = ice_dcf_shaper_profile_search(dev, shaper_profile_id);
+
+	if (!shaper_profile) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID;
+		error->message = "profile ID not exist";
+		return -EINVAL;
+	}
+
+	/* don't delete a profile if it's used by one or several nodes */
+	if (shaper_profile->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_SHAPER_PROFILE;
+		error->message = "profile in use";
+		return -EINVAL;
+	}
+
+	TAILQ_REMOVE(&hw->tm_conf.shaper_profile_list, shaper_profile, node);
+	rte_free(shaper_profile);
+
+	return 0;
+}
+
+static int
+ice_dcf_set_vf_bw(struct ice_dcf_hw *hw,
+			struct virtchnl_dcf_bw_cfg_list *vf_bw,
+			uint16_t len)
+{
+	struct dcf_virtchnl_cmd args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.v_op = VIRTCHNL_OP_DCF_CONFIG_BW;
+	args.req_msg = (uint8_t *)vf_bw;
+	args.req_msglen  = len;
+	err = ice_dcf_execute_virtchnl_cmd(hw, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "fail to execute command %s",
+			    "VIRTCHNL_OP_DCF_CONFIG_BW");
+	return err;
+}
+
+static int
+ice_dcf_validate_tc_bw(struct virtchnl_dcf_bw_cfg_list *tc_bw,
+			uint32_t port_bw)
+{
+	struct virtchnl_dcf_bw_cfg *cfg;
+	bool lowest_cir_mark = false;
+	u32 total_peak, rest_peak;
+	u32 committed, peak;
+	int i;
+
+	total_peak = 0;
+	for (i = 0; i < tc_bw->num_elem; i++)
+		total_peak += tc_bw->cfg[i].shaper.peak;
+
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		cfg = &tc_bw->cfg[i];
+		peak = cfg->shaper.peak;
+		committed = cfg->shaper.committed;
+		rest_peak = total_peak - peak;
+
+		if (lowest_cir_mark && peak == 0) {
+			PMD_DRV_LOG(ERR, "Max bandwidth must be configured for TC%u",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (!lowest_cir_mark && committed)
+			lowest_cir_mark = true;
+
+		if (committed && committed + rest_peak > port_bw) {
+			PMD_DRV_LOG(ERR, "Total value of TC%u min bandwidth and other TCs' max bandwidth %ukbps should be less than port link speed %ukbps",
+				cfg->tc_num, committed + rest_peak, port_bw);
+			return -EINVAL;
+		}
+
+		if (committed && committed < ICE_SCHED_MIN_BW) {
+			PMD_DRV_LOG(ERR, "If TC%u min Tx bandwidth is set, it cannot be less than 500Kbps",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak && committed > peak) {
+			PMD_DRV_LOG(ERR, "TC%u Min Tx bandwidth cannot be greater than max Tx bandwidth",
+				cfg->tc_num);
+			return -EINVAL;
+		}
+
+		if (peak > port_bw) {
+			PMD_DRV_LOG(ERR, "TC%u max Tx bandwidth %uKbps is greater than current link speed %uKbps",
+				cfg->tc_num, peak, port_bw);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+static int ice_dcf_hierarchy_commit(struct rte_eth_dev *dev,
+				 int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct ice_dcf_adapter *adapter = dev->data->dev_private;
+	struct ice_dcf_hw *hw = &adapter->real_hw;
+	struct virtchnl_dcf_bw_cfg_list *vf_bw;
+	struct virtchnl_dcf_bw_cfg_list *tc_bw;
+	struct ice_dcf_tm_node_list *vsi_list = &hw->tm_conf.vsi_list;
+	struct rte_tm_shaper_params *profile;
+	struct ice_dcf_tm_node *tm_node;
+	uint32_t port_bw, cir_total;
+	uint16_t size, vf_id;
+	uint8_t num_elem = 0;
+	int i, ret_val = ICE_SUCCESS;
+
+	if (!(hw->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)) {
+		PMD_DRV_LOG(ERR, "Configure VF bandwidth is not supported");
+		ret_val = ICE_ERR_NOT_SUPPORTED;
+		goto fail_clear;
+	}
+
+	/* check if all TC nodes are set */
+	if (BIT(hw->tm_conf.nb_tc_node) & hw->ets_config->tc_valid_bits) {
+		PMD_DRV_LOG(ERR, "Not all enabled TC nodes are set");
+		ret_val = ICE_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	size = sizeof(struct virtchnl_dcf_bw_cfg_list) +
+		sizeof(struct virtchnl_dcf_bw_cfg) *
+		(hw->tm_conf.nb_tc_node - 1);
+	vf_bw = rte_zmalloc("vf_bw", size, 0);
+	if (!vf_bw) {
+		ret_val = ICE_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+	tc_bw = rte_zmalloc("tc_bw", size, 0);
+	if (!tc_bw) {
+		ret_val = ICE_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+
+	/* port bandwidth (Kbps) */
+	port_bw = hw->link_speed * 1000;
+	cir_total = 0;
+
+	/* init tc bw configuration */
+#define ICE_DCF_SCHED_TC_NODE 0xffff
+	tc_bw->vf_id = ICE_DCF_SCHED_TC_NODE;
+	tc_bw->node_type = VIRTCHNL_DCF_TARGET_TC_BW;
+	tc_bw->num_elem = hw->tm_conf.nb_tc_node;
+	for (i = 0; i < tc_bw->num_elem; i++) {
+		tc_bw->cfg[i].tc_num = i;
+		tc_bw->cfg[i].type = VIRTCHNL_BW_SHAPER;
+		tc_bw->cfg[i].bw_type |=
+			VIRTCHNL_DCF_BW_PIR | VIRTCHNL_DCF_BW_CIR;
+	}
+
+	for (vf_id = 0; vf_id < hw->num_vfs; vf_id++) {
+		num_elem = 0;
+		vf_bw->vf_id = vf_id;
+		vf_bw->node_type = VIRTCHNL_DCF_TARGET_VF_BW;
+		TAILQ_FOREACH(tm_node, vsi_list, node) {
+			/* scan the nodes belong to one VSI */
+			if (tm_node->id - hw->num_vfs * tm_node->tc != vf_id)
+				continue;
+			vf_bw->cfg[num_elem].tc_num = tm_node->tc;
+			vf_bw->cfg[num_elem].type = VIRTCHNL_BW_SHAPER;
+			if (tm_node->shaper_profile) {
+				/* Transfer from Byte per seconds to Kbps */
+				profile = &tm_node->shaper_profile->profile;
+				vf_bw->cfg[num_elem].shaper.peak =
+				profile->peak.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].shaper.committed =
+				profile->committed.rate / 1000 * BITS_PER_BYTE;
+				vf_bw->cfg[num_elem].bw_type |=
+					VIRTCHNL_DCF_BW_PIR |
+					VIRTCHNL_DCF_BW_CIR;
+			}
+
+			/* update tc node bw configuration */
+			tc_bw->cfg[tm_node->tc].shaper.peak +=
+				vf_bw->cfg[num_elem].shaper.peak;
+			tc_bw->cfg[tm_node->tc].shaper.committed +=
+				vf_bw->cfg[num_elem].shaper.committed;
+
+			cir_total += vf_bw->cfg[num_elem].shaper.committed;
+			num_elem++;
+		}
+
+		/* check if all TC nodes are set with VF vsi nodes */
+		if (num_elem != hw->tm_conf.nb_tc_node) {
+			PMD_DRV_LOG(ERR, "VF%u vsi nodes are not set to all TC nodes, node id should be continuous",
+				    vf_id);
+			ret_val = ICE_ERR_PARAM;
+			goto fail_clear;
+		}
+
+		vf_bw->num_elem = num_elem;
+		ret_val = ice_dcf_set_vf_bw(hw, vf_bw, size);
+		if (ret_val)
+			goto fail_clear;
+		memset(vf_bw, 0, size);
+	}
+
+	/* check if total CIR is larger than port bandwidth */
+	if (cir_total > port_bw) {
+		PMD_DRV_LOG(ERR, "Total CIR of all VFs is larger than port bandwidth");
+		ret_val = ICE_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	/* check and commit tc node bw configuration */
+	ret_val = ice_dcf_validate_tc_bw(tc_bw, port_bw);
+	if (ret_val)
+		goto fail_clear;
+	ret_val = ice_dcf_set_vf_bw(hw, tc_bw, size);
+	if (ret_val)
+		goto fail_clear;
+
+	hw->tm_conf.committed = true;
+	return ret_val;
+
+fail_clear:
+	/* clear all the traffic manager configuration */
+	if (clear_on_fail) {
+		ice_dcf_tm_conf_uninit(dev);
+		ice_dcf_tm_conf_init(dev);
+	}
+	return ret_val;
+}
diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
index 65750d3501..0b86d74a49 100644
--- a/drivers/net/ice/meson.build
+++ b/drivers/net/ice/meson.build
@@ -70,6 +70,7 @@ endif
 sources += files('ice_dcf.c',
          'ice_dcf_vf_representor.c',
          'ice_dcf_ethdev.c',
-         'ice_dcf_parent.c')
+         'ice_dcf_parent.c',
+	 'ice_dcf_sched.c')
 
 headers = files('rte_pmd_ice.h')
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping
  2021-07-01 11:41 ` Ting Xu
                     ` (3 preceding siblings ...)
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
@ 2021-07-01 11:41   ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS Ting Xu
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

This patch added the support for VF to config the ETS-based Tx QoS,
including querying current QoS configuration from PF and config queue TC
mapping. PF QoS is configured in advance and the queried info is
provided to the user for future usage. VF queues are mapped to different
TCs in PF through virtchnl.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h        |  46 +++
 drivers/net/iavf/iavf_ethdev.c |  37 ++
 drivers/net/iavf/iavf_tm.c     | 730 +++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c  |  56 ++-
 drivers/net/iavf/meson.build   |   1 +
 5 files changed, 869 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4f5811ae87..feb8337b55 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -6,6 +6,8 @@
 #define _IAVF_ETHDEV_H_
 
 #include <rte_kvargs.h>
+#include <rte_tm_driver.h>
+
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
 #include <iavf_type.h>
@@ -82,6 +84,8 @@
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
 
+#define IAVF_BITS_PER_BYTE 8
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -129,6 +133,38 @@ enum iavf_aq_result {
 	IAVF_MSG_CMD,      /* Read async command result */
 };
 
+/* Struct to store Traffic Manager node configuration. */
+struct iavf_tm_node {
+	TAILQ_ENTRY(iavf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct iavf_tm_node *parent;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
+
+/* node type of Traffic Manager */
+enum iavf_tm_node_type {
+	IAVF_TM_NODE_TYPE_PORT,
+	IAVF_TM_NODE_TYPE_TC,
+	IAVF_TM_NODE_TYPE_QUEUE,
+	IAVF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct iavf_tm_conf {
+	struct iavf_tm_node *root; /* root node - vf vsi */
+	struct iavf_tm_node_list tc_list; /* node list for all the TCs */
+	struct iavf_tm_node_list queue_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_queue_node;
+	bool committed;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -175,6 +211,9 @@ struct iavf_info {
 	struct iavf_fdir_info fdir; /* flow director info */
 	/* indicate large VF support enabled or not */
 	bool lv_enabled;
+
+	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_tm_conf tm_conf;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -344,4 +383,11 @@ int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			uint32_t mc_addrs_num, bool add);
 int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
+int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+			struct virtchnl_queue_tc_mapping *q_tc_mapping,
+			uint16_t size);
+void iavf_tm_conf_init(struct rte_eth_dev *dev);
+void iavf_tm_conf_uninit(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops iavf_tm_ops;
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index 5290588b17..7272f69a06 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -122,6 +122,7 @@ static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
 static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
 			uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
@@ -200,8 +201,21 @@ static const struct eth_dev_ops iavf_eth_dev_ops = {
 	.flow_ops_get               = iavf_dev_flow_ops_get,
 	.tx_done_cleanup	    = iavf_dev_tx_done_cleanup,
 	.get_monitor_addr           = iavf_get_monitor_addr,
+	.tm_ops_get                 = iavf_tm_ops_get,
 };
 
+static int
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+			void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &iavf_tm_ops;
+
+	return 0;
+}
+
 static int
 iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
@@ -806,6 +820,12 @@ iavf_dev_start(struct rte_eth_dev *dev)
 				      dev->data->nb_tx_queues);
 	num_queue_pairs = vf->num_queue_pairs;
 
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		if (iavf_get_qos_cap(adapter)) {
+			PMD_INIT_LOG(ERR, "Failed to get qos capability");
+			return -1;
+		}
+
 	if (iavf_init_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "failed to do Queue init");
 		return -1;
@@ -2090,6 +2110,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
 		goto err_api;
 	}
+
 	if (iavf_get_vf_resource(adapter) != 0) {
 		PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
 		goto err_alloc;
@@ -2124,6 +2145,18 @@ iavf_init_vf(struct rte_eth_dev *dev)
 		}
 	}
 
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS) {
+		bufsz = sizeof(struct virtchnl_qos_cap_list) +
+			IAVF_MAX_TRAFFIC_CLASS *
+			sizeof(struct virtchnl_qos_cap_elem);
+		vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+		if (!vf->qos_cap) {
+			PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+			goto err_rss;
+		}
+		iavf_tm_conf_init(dev);
+	}
+
 	iavf_init_proto_xtr(dev);
 
 	return 0;
@@ -2131,6 +2164,7 @@ iavf_init_vf(struct rte_eth_dev *dev)
 	rte_free(vf->rss_key);
 	rte_free(vf->rss_lut);
 err_alloc:
+	rte_free(vf->qos_cap);
 	rte_free(vf->vf_res);
 	vf->vsi_res = NULL;
 err_api:
@@ -2338,6 +2372,9 @@ iavf_dev_close(struct rte_eth_dev *dev)
 				     iavf_dev_interrupt_handler, dev);
 	iavf_disable_irq0(hw);
 
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)
+		iavf_tm_conf_uninit(dev);
+
 	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
 		if (vf->rss_lut) {
 			rte_free(vf->rss_lut);
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
new file mode 100644
index 0000000000..03349619d1
--- /dev/null
+++ b/drivers/net/iavf/iavf_tm.c
@@ -0,0 +1,730 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "iavf.h"
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error);
+static int iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error);
+static int iavf_node_capabilities_get(struct rte_eth_dev *dev,
+				      uint32_t node_id,
+				      struct rte_tm_node_capabilities *cap,
+				      struct rte_tm_error *error);
+static int iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error);
+
+const struct rte_tm_ops iavf_tm_ops = {
+	.node_add = iavf_tm_node_add,
+	.node_delete = iavf_tm_node_delete,
+	.capabilities_get = iavf_tm_capabilities_get,
+	.level_capabilities_get = iavf_level_capabilities_get,
+	.node_capabilities_get = iavf_node_capabilities_get,
+	.node_type_get = iavf_node_type_get,
+	.hierarchy_commit = iavf_hierarchy_commit,
+};
+
+void
+iavf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	/* initialize node configuration */
+	vf->tm_conf.root = NULL;
+	TAILQ_INIT(&vf->tm_conf.tc_list);
+	TAILQ_INIT(&vf->tm_conf.queue_list);
+	vf->tm_conf.nb_tc_node = 0;
+	vf->tm_conf.nb_queue_node = 0;
+	vf->tm_conf.committed = false;
+}
+
+void
+iavf_tm_conf_uninit(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node *tm_node;
+
+	/* clear node configuration */
+	while ((tm_node = TAILQ_FIRST(&vf->tm_conf.queue_list))) {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	vf->tm_conf.nb_queue_node = 0;
+	while ((tm_node = TAILQ_FIRST(&vf->tm_conf.tc_list))) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		rte_free(tm_node);
+	}
+	vf->tm_conf.nb_tc_node = 0;
+	if (vf->tm_conf.root) {
+		rte_free(vf->tm_conf.root);
+		vf->tm_conf.root = NULL;
+	}
+}
+
+static inline struct iavf_tm_node *
+iavf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum iavf_tm_node_type *node_type)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node_list *tc_list = &vf->tm_conf.tc_list;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+
+	if (vf->tm_conf.root && vf->tm_conf.root->id == node_id) {
+		*node_type = IAVF_TM_NODE_TYPE_PORT;
+		return vf->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_QUEUE;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+iavf_node_param_check(struct iavf_info *vf, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shaper profile */
+	if (params->shaper_profile_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+		error->message = "shaper profile not supported";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= vf->num_queue_pairs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error)
+{
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!is_leaf || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type == IAVF_TM_NODE_TYPE_QUEUE)
+		*is_leaf = true;
+	else
+		*is_leaf = false;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	enum iavf_tm_node_type parent_node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+	struct iavf_tm_node *parent_node;
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+	int ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = iavf_node_param_check(vf, node_id, priority, weight,
+				    params, error);
+	if (ret)
+		return ret;
+
+	/* check if the node is already existed */
+	if (iavf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != IAVF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (vf->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("iavf_tm_node",
+				      sizeof(struct iavf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		vf->tm_conf.root = tm_node;
+		return 0;
+	}
+
+	/* TC or queue node */
+	/* check the parent node */
+	parent_node = iavf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != IAVF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not root or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the node number */
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (vf->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the queue number */
+		if (parent_node->reference_count >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many queues";
+			return -EINVAL;
+		}
+		if (node_id >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large queue id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or queue node */
+	tm_node = rte_zmalloc("iavf_tm_node",
+			      sizeof(struct iavf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = vf->tm_conf.nb_tc_node;
+		vf->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.queue_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		vf->tm_conf.nb_queue_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == IAVF_TM_NODE_TYPE_PORT) {
+		rte_free(tm_node);
+		vf->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or queue node */
+	tm_node->parent->reference_count--;
+	if (node_type == IAVF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		vf->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		vf->tm_conf.nb_queue_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (tc_nb > vf->vf_res->num_queue_pairs)
+		return -EINVAL;
+
+	error->type = RTE_TM_ERROR_TYPE_NONE;
+
+	/* set all the parameters to 0 first. */
+	memset(cap, 0, sizeof(struct rte_tm_capabilities));
+
+	/**
+	 * support port + TCs + queues
+	 * here shows the max capability not the current configuration.
+	 */
+	cap->n_nodes_max = 1 + IAVF_MAX_TRAFFIC_CLASS
+		+ vf->num_queue_pairs;
+	cap->n_levels_max = 3; /* port, TC, queue */
+	cap->non_leaf_nodes_identical = 1;
+	cap->leaf_nodes_identical = 1;
+	cap->shaper_n_max = cap->n_nodes_max;
+	cap->shaper_private_n_max = cap->n_nodes_max;
+	cap->shaper_private_dual_rate_n_max = 0;
+	cap->shaper_private_rate_min = 0;
+	/* GBps */
+	cap->shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->shaper_private_packet_mode_supported = 0;
+	cap->shaper_private_byte_mode_supported = 1;
+	cap->shaper_shared_n_max = 0;
+	cap->shaper_shared_n_nodes_per_shaper_max = 0;
+	cap->shaper_shared_n_shapers_per_node_max = 0;
+	cap->shaper_shared_dual_rate_n_max = 0;
+	cap->shaper_shared_rate_min = 0;
+	cap->shaper_shared_rate_max = 0;
+	cap->shaper_shared_packet_mode_supported = 0;
+	cap->shaper_shared_byte_mode_supported = 0;
+	cap->sched_n_children_max = vf->num_queue_pairs;
+	cap->sched_sp_n_priorities_max = 1;
+	cap->sched_wfq_n_children_per_group_max = 0;
+	cap->sched_wfq_n_groups_max = 0;
+	cap->sched_wfq_weight_max = 1;
+	cap->sched_wfq_packet_mode_supported = 0;
+	cap->sched_wfq_byte_mode_supported = 0;
+	cap->cman_head_drop_supported = 0;
+	cap->dynamic_update_mask = 0;
+	cap->shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD;
+	cap->shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS;
+	cap->cman_wred_context_n_max = 0;
+	cap->cman_wred_context_private_n_max = 0;
+	cap->cman_wred_context_shared_n_max = 0;
+	cap->cman_wred_context_shared_n_nodes_per_context_max = 0;
+	cap->cman_wred_context_shared_n_contexts_per_node_max = 0;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (level_id >= IAVF_TM_NODE_TYPE_MAX) {
+		error->type = RTE_TM_ERROR_TYPE_LEVEL_ID;
+		error->message = "too deep level";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (level_id == IAVF_TM_NODE_TYPE_PORT) {
+		cap->n_nodes_max = 1;
+		cap->n_nodes_nonleaf_max = 1;
+		cap->n_nodes_leaf_max = 0;
+	} else if (level_id == IAVF_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = vf->num_queue_pairs;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = vf->num_queue_pairs;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IAVF_TM_NODE_TYPE_QUEUE) {
+		cap->nonleaf.shaper_private_supported = true;
+		cap->nonleaf.shaper_private_dual_rate_supported = false;
+		cap->nonleaf.shaper_private_rate_min = 0;
+		/* GBps */
+		cap->nonleaf.shaper_private_rate_max =
+			vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+		cap->nonleaf.shaper_private_packet_mode_supported = 0;
+		cap->nonleaf.shaper_private_byte_mode_supported = 1;
+		cap->nonleaf.shaper_shared_n_max = 0;
+		cap->nonleaf.shaper_shared_packet_mode_supported = 0;
+		cap->nonleaf.shaper_shared_byte_mode_supported = 0;
+		if (level_id == IAVF_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IAVF_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				vf->num_queue_pairs;
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+		cap->nonleaf.sched_wfq_packet_mode_supported = 0;
+		cap->nonleaf.sched_wfq_byte_mode_supported = 0;
+		cap->nonleaf.stats_mask = 0;
+
+		return 0;
+	}
+
+	/* queue node */
+	cap->leaf.shaper_private_supported = false;
+	cap->leaf.shaper_private_dual_rate_supported = false;
+	cap->leaf.shaper_private_rate_min = 0;
+	/* GBps */
+	cap->leaf.shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->leaf.shaper_private_packet_mode_supported = 0;
+	cap->leaf.shaper_private_byte_mode_supported = 1;
+	cap->leaf.shaper_shared_n_max = 0;
+	cap->leaf.shaper_shared_packet_mode_supported = 0;
+	cap->leaf.shaper_shared_byte_mode_supported = 0;
+	cap->leaf.cman_head_drop_supported = false;
+	cap->leaf.cman_wred_context_private_supported = true;
+	cap->leaf.cman_wred_context_shared_n_max = 0;
+	cap->leaf.stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_node_capabilities_get(struct rte_eth_dev *dev,
+			   uint32_t node_id,
+			   struct rte_tm_node_capabilities *cap,
+			   struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type;
+	struct virtchnl_qos_cap_elem tc_cap;
+	struct iavf_tm_node *tm_node;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "not support capability get";
+		return -EINVAL;
+	}
+
+	tc_cap = vf->qos_cap->cap[tm_node->tc];
+	if (tc_cap.tc_num != tm_node->tc) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "tc not match";
+		return -EINVAL;
+	}
+
+	cap->shaper_private_supported = true;
+	cap->shaper_private_dual_rate_supported = false;
+	cap->shaper_private_rate_min = tc_cap.shaper.committed;
+	cap->shaper_private_rate_max = tc_cap.shaper.peak;
+	cap->shaper_shared_n_max = 0;
+	cap->nonleaf.sched_n_children_max = vf->num_queue_pairs;
+	cap->nonleaf.sched_sp_n_priorities_max = 1;
+	cap->nonleaf.sched_wfq_n_children_per_group_max = 1;
+	cap->nonleaf.sched_wfq_n_groups_max = 0;
+	cap->nonleaf.sched_wfq_weight_max = tc_cap.weight;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_adapter *adapter =
+		IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct virtchnl_queue_tc_mapping *q_tc_mapping;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+	uint16_t size;
+	int index = 0, node_committed = 0;
+	int i, ret_val = IAVF_SUCCESS;
+
+	/* check if port is stopped */
+	if (adapter->stopped != 1) {
+		PMD_DRV_LOG(ERR, "Please stop port first");
+		ret_val = IAVF_ERR_NOT_READY;
+		goto err;
+	}
+
+	if (!(vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS)) {
+		PMD_DRV_LOG(ERR, "VF queue tc mapping is not supported");
+		ret_val = IAVF_NOT_SUPPORTED;
+		goto fail_clear;
+	}
+
+	/* check if all TC nodes are set with VF vsi */
+	if (vf->tm_conf.nb_tc_node != vf->qos_cap->num_elem) {
+		PMD_DRV_LOG(ERR, "Does not set VF vsi nodes to all TCs");
+		ret_val = IAVF_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	size = sizeof(*q_tc_mapping) + sizeof(q_tc_mapping->tc[0]) *
+		(vf->qos_cap->num_elem - 1);
+	q_tc_mapping = rte_zmalloc("q_tc", size, 0);
+	if (!q_tc_mapping) {
+		ret_val = IAVF_ERR_NO_MEMORY;
+		goto fail_clear;
+	}
+
+	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
+	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
+	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->tc >= q_tc_mapping->num_tc) {
+			PMD_DRV_LOG(ERR, "TC%d is not enabled", tm_node->tc);
+			ret_val = IAVF_ERR_PARAM;
+			goto fail_clear;
+		}
+		q_tc_mapping->tc[tm_node->tc].req.queue_count++;
+		node_committed++;
+	}
+
+	/* All queues allocated to this VF should be mapped */
+	if (node_committed < vf->num_queue_pairs) {
+		PMD_DRV_LOG(ERR, "queue node is less than allocated queue pairs");
+		ret_val = IAVF_ERR_PARAM;
+		goto fail_clear;
+	}
+
+	for (i = 0; i < q_tc_mapping->num_tc; i++) {
+		q_tc_mapping->tc[i].req.start_queue_id = index;
+		index += q_tc_mapping->tc[i].req.queue_count;
+	}
+
+	ret_val = iavf_set_q_tc_map(dev, q_tc_mapping, size);
+	if (ret_val)
+		goto fail_clear;
+
+	vf->tm_conf.committed = true;
+	return ret_val;
+
+fail_clear:
+	/* clear all the traffic manager configuration */
+	if (clear_on_fail) {
+		iavf_tm_conf_uninit(dev);
+		iavf_tm_conf_init(dev);
+	}
+err:
+	return ret_val;
+}
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 02e828f9b7..06dc663947 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -467,7 +467,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter)
 		VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 		VIRTCHNL_VF_OFFLOAD_CRC |
 		VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-		VIRTCHNL_VF_LARGE_NUM_QPAIRS;
+		VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+		VIRTCHNL_VF_OFFLOAD_QOS;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -1550,6 +1551,59 @@ iavf_set_hena(struct iavf_adapter *adapter, uint64_t hena)
 	return err;
 }
 
+int
+iavf_get_qos_cap(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	uint32_t len;
+	int err;
+
+	args.ops = VIRTCHNL_OP_GET_QOS_CAPS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+	err = iavf_execute_vf_cmd(adapter, &args);
+
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_VF_RESOURCE");
+		return -1;
+	}
+
+	len =  sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+
+	rte_memcpy(vf->qos_cap, args.out_buffer,
+		   RTE_MIN(args.out_size, len));
+
+	return 0;
+}
+
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+		struct virtchnl_queue_tc_mapping *q_tc_mapping, uint16_t size)
+{
+	struct iavf_adapter *adapter =
+			IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_cmd_info args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.ops = VIRTCHNL_OP_CONFIG_QUEUE_TC_MAP;
+	args.in_args = (uint8_t *)q_tc_mapping;
+	args.in_args_size = size;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	err = iavf_execute_vf_cmd(adapter, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "Failed to execute command of"
+			    " VIRTCHNL_OP_CONFIG_TC_MAP");
+	return err;
+}
+
 int
 iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			struct rte_ether_addr *mc_addrs,
diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build
index 6f222a9e87..f2010a8337 100644
--- a/drivers/net/iavf/meson.build
+++ b/drivers/net/iavf/meson.build
@@ -19,6 +19,7 @@ sources = files(
         'iavf_generic_flow.c',
         'iavf_fdir.c',
         'iavf_hash.c',
+        'iavf_tm.c',
 )
 
 if arch_subdir == 'x86'
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue
  2021-07-01 11:41 ` Ting Xu
                     ` (4 preceding siblings ...)
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
@ 2021-07-01 11:41   ` Ting Xu
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS Ting Xu
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

Add check in the Tx packet preparation function, to guarantee that the
packet with specific user priority is distributed to the correct Tx
queue according to the configured Tx queue TC mapping.

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h      | 10 ++++++++
 drivers/net/iavf/iavf_rxtx.c | 49 ++++++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_rxtx.h |  1 +
 drivers/net/iavf/iavf_tm.c   | 13 ++++++++++
 4 files changed, 73 insertions(+)

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index feb8337b55..b3bd078111 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -86,6 +86,8 @@
 
 #define IAVF_BITS_PER_BYTE 8
 
+#define IAVF_VLAN_TAG_PCP_OFFSET 13
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -165,6 +167,13 @@ struct iavf_tm_conf {
 	bool committed;
 };
 
+/* Struct to store queue TC mapping. Queue is continuous in one TC */
+struct iavf_qtc_map {
+	uint8_t	tc;
+	uint16_t start_queue_id;
+	uint16_t queue_count;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -213,6 +222,7 @@ struct iavf_info {
 	bool lv_enabled;
 
 	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_qtc_map *qtc_map;
 	struct iavf_tm_conf tm_conf;
 };
 
diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c
index 0361af0d85..f817fbc49b 100644
--- a/drivers/net/iavf/iavf_rxtx.c
+++ b/drivers/net/iavf/iavf_rxtx.c
@@ -785,6 +785,22 @@ iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		ad->tx_vec_allowed = false;
 	}
 
+	if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS &&
+	    vf->tm_conf.committed) {
+		int tc;
+		for (tc = 0; tc < vf->qos_cap->num_elem; tc++) {
+			if (txq->queue_id >= vf->qtc_map[tc].start_queue_id &&
+			    txq->queue_id < (vf->qtc_map[tc].start_queue_id +
+			    vf->qtc_map[tc].queue_count))
+				break;
+		}
+		if (tc >= vf->qos_cap->num_elem) {
+			PMD_INIT_LOG(ERR, "Queue TC mapping is not correct");
+			return -EINVAL;
+		}
+		txq->tc = tc;
+	}
+
 	return 0;
 }
 
@@ -2342,6 +2358,27 @@ iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	return nb_tx;
 }
 
+/* Check if the packet with vlan user priority is transmitted in the
+ * correct queue.
+ */
+static int
+iavf_check_vlan_up2tc(struct iavf_tx_queue *txq, struct rte_mbuf *m)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t up;
+
+	up = m->vlan_tci >> IAVF_VLAN_TAG_PCP_OFFSET;
+
+	if (!(vf->qos_cap->cap[txq->tc].tc_prio & BIT(up))) {
+		PMD_TX_LOG(ERR, "packet with vlan pcp %u cannot transmit in queue %u\n",
+			up, txq->queue_id);
+		return -1;
+	} else {
+		return 0;
+	}
+}
+
 /* TX prep functions */
 uint16_t
 iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
@@ -2350,6 +2387,9 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
 	int i, ret;
 	uint64_t ol_flags;
 	struct rte_mbuf *m;
+	struct iavf_tx_queue *txq = tx_queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
 	for (i = 0; i < nb_pkts; i++) {
 		m = tx_pkts[i];
@@ -2385,6 +2425,15 @@ iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
 			rte_errno = -ret;
 			return i;
 		}
+
+		if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QOS &&
+		    ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN)) {
+			ret = iavf_check_vlan_up2tc(txq, m);
+			if (ret != 0) {
+				rte_errno = -ret;
+				return i;
+			}
+		}
 	}
 
 	return i;
diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h
index 19b6028a15..e210b913d6 100644
--- a/drivers/net/iavf/iavf_rxtx.h
+++ b/drivers/net/iavf/iavf_rxtx.h
@@ -252,6 +252,7 @@ struct iavf_tx_queue {
 #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1	BIT(0)
 #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2	BIT(1)
 	uint8_t vlan_flag;
+	uint8_t tc;
 };
 
 /* Offload features */
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
index 03349619d1..558021014e 100644
--- a/drivers/net/iavf/iavf_tm.c
+++ b/drivers/net/iavf/iavf_tm.c
@@ -655,6 +655,7 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 	struct virtchnl_queue_tc_mapping *q_tc_mapping;
 	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
 	struct iavf_tm_node *tm_node;
+	struct iavf_qtc_map *qtc_map;
 	uint16_t size;
 	int index = 0, node_committed = 0;
 	int i, ret_val = IAVF_SUCCESS;
@@ -690,6 +691,7 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
 	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
 	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+
 	TAILQ_FOREACH(tm_node, queue_list, node) {
 		if (tm_node->tc >= q_tc_mapping->num_tc) {
 			PMD_DRV_LOG(ERR, "TC%d is not enabled", tm_node->tc);
@@ -707,15 +709,26 @@ static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
 		goto fail_clear;
 	}
 
+	/* store the queue TC mapping info */
+	qtc_map = rte_zmalloc("qtc_map",
+		sizeof(struct iavf_qtc_map) * q_tc_mapping->num_tc, 0);
+	if (!qtc_map)
+		return IAVF_ERR_NO_MEMORY;
+
 	for (i = 0; i < q_tc_mapping->num_tc; i++) {
 		q_tc_mapping->tc[i].req.start_queue_id = index;
 		index += q_tc_mapping->tc[i].req.queue_count;
+		qtc_map[i].tc = i;
+		qtc_map[i].start_queue_id =
+			q_tc_mapping->tc[i].req.start_queue_id;
+		qtc_map[i].queue_count = q_tc_mapping->tc[i].req.queue_count;
 	}
 
 	ret_val = iavf_set_q_tc_map(dev, q_tc_mapping, size);
 	if (ret_val)
 		goto fail_clear;
 
+	vf->qtc_map = qtc_map;
 	vf->tm_conf.committed = true;
 	return ret_val;
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS
  2021-07-01 11:41 ` Ting Xu
                     ` (5 preceding siblings ...)
  2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
@ 2021-07-01 11:41   ` Ting Xu
  6 siblings, 0 replies; 44+ messages in thread
From: Ting Xu @ 2021-07-01 11:41 UTC (permalink / raw)
  To: dev; +Cc: qi.z.zhang, jingjing.wu, beilei.xing, qiming.yang, Ting Xu

Add 21.08 release note for ETS-based Tx QoS

Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 doc/guides/rel_notes/release_21_08.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/guides/rel_notes/release_21_08.rst b/doc/guides/rel_notes/release_21_08.rst
index a6ecfdf3ce..35aa76a270 100644
--- a/doc/guides/rel_notes/release_21_08.rst
+++ b/doc/guides/rel_notes/release_21_08.rst
@@ -55,6 +55,13 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Updated Intel iavf driver.**
+
+  * Added Tx QoS VF queue TC mapping.
+
+* **Updated Intel ice driver.**
+
+  * Added Tx QoS TC bandwidth configuration in DCF.
 
 Removed Items
 -------------
-- 
2.17.1


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF
  2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
                     ` (6 preceding siblings ...)
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS Ting Xu
@ 2021-07-02  3:00   ` Zhang, Qi Z
  7 siblings, 0 replies; 44+ messages in thread
From: Zhang, Qi Z @ 2021-07-02  3:00 UTC (permalink / raw)
  To: Xu, Ting, dev; +Cc: Wu, Jingjing, Xing, Beilei, Yang, Qiming



> -----Original Message-----
> From: Xu, Ting <ting.xu@intel.com>
> Sent: Thursday, July 1, 2021 6:20 PM
> To: dev@dpdk.org
> Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> Xing, Beilei <beilei.xing@intel.com>; Yang, Qiming <qiming.yang@intel.com>;
> Xu, Ting <ting.xu@intel.com>
> Subject: [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF
> 
> This patch enables the ETS-based Tx QoS for IAVF. Kernel tool is used to
> configure ETS first. DCF is used to set bandwidth limit for VFs of each TC. IAVF
> is supported to query QoS capability and set queue TC mapping.
> Traffic Management API is utilized to configure the QoS hierarchy scheduler
> tree. The scheduler tree will be passed to hardware to enable all above
> functions.
> 
> Ting Xu (7):
>   common/iavf: support ETS-based QoS offload configuration
>   net/ice/base: support DCF query port ETS adminq
>   net/ice: support DCF link status event handling
>   net/ice: support QoS config VF bandwidth in DCF
>   net/iavf: query QoS cap and set queue TC mapping
>   net/iavf: check Tx packet with correct UP and queue
>   doc: release note for ETS-based Tx QoS
> 
>  doc/guides/rel_notes/release_21_08.rst |   7 +
>  drivers/common/iavf/iavf_type.h        |   2 +
>  drivers/common/iavf/virtchnl.h         | 131 +++++
>  drivers/net/iavf/iavf.h                |  56 ++
>  drivers/net/iavf/iavf_ethdev.c         |  34 ++
>  drivers/net/iavf/iavf_rxtx.c           |  43 ++
>  drivers/net/iavf/iavf_tm.c             | 737 ++++++++++++++++++++++++
>  drivers/net/iavf/iavf_vchnl.c          |  56 +-
>  drivers/net/iavf/meson.build           |   1 +
>  drivers/net/ice/base/ice_dcb.c         |   3 +-
>  drivers/net/ice/ice_dcf.c              |   9 +-
>  drivers/net/ice/ice_dcf.h              |  54 ++
>  drivers/net/ice/ice_dcf_ethdev.c       |  68 ++-
>  drivers/net/ice/ice_dcf_ethdev.h       |   3 +
>  drivers/net/ice/ice_dcf_parent.c       |  81 +++
>  drivers/net/ice/ice_dcf_sched.c        | 759
> +++++++++++++++++++++++++
>  drivers/net/ice/meson.build            |   3 +-
>  17 files changed, 2040 insertions(+), 7 deletions(-)  create mode 100644
> drivers/net/iavf/iavf_tm.c  create mode 100644
> drivers/net/ice/ice_dcf_sched.c
> 
> --
> 2.17.1

Acked-by: Qi Zhang <qi.z.zhang@intel.com>

Applied to dpdk-next-net-intel.

Thanks
Qi


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF
  2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
@ 2021-07-07  9:23     ` Thomas Monjalon
  0 siblings, 0 replies; 44+ messages in thread
From: Thomas Monjalon @ 2021-07-07  9:23 UTC (permalink / raw)
  To: qi.z.zhang, Ting Xu
  Cc: dev, jingjing.wu, beilei.xing, qiming.yang, ferruh.yigit,
	bruce.richardson, david.marchand, aconole, alialnu, rasland

01/07/2021 12:20, Ting Xu:
> This patch supports the ETS-based QoS configuration. It enables the DCF
> to configure bandwidth limits for each VF VSI of different TCs. A
> hierarchy scheduler tree is built with port, TC and VSI nodes.
> 
> Signed-off-by: Qiming Yang <qiming.yang@intel.com>
> Signed-off-by: Ting Xu <ting.xu@intel.com>

This patch has been merged without checking the CI report:
http://mails.dpdk.org/archives/test-report/2021-July/201905.html

As a result, the compilation is now failing in the main branch on RHEL.
Please fix it today, it is blocking the -rc1.




^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2021-07-07  9:23 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-01  1:40 [dpdk-dev] [PATCH v1 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 1/5] common/iavf: add support for ETS-based Tx QoS Ting Xu
2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 3/5] net/ice: support DCF link status event handling Ting Xu
2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
2021-06-01  1:40 ` [dpdk-dev] [PATCH v1 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
2021-06-17 10:17 ` [dpdk-dev] [PATCH v2 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 3/5] net/ice: support DCF link status event handling Ting Xu
2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
2021-06-17 10:17   ` [dpdk-dev] [PATCH v2 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
2021-06-25  9:31 ` [dpdk-dev] [PATCH v3 0/5] Enable ETS-based Tx QoS for VF in DCF Ting Xu
2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 1/5] common/iavf: support ETS-based QoS offload configuration Ting Xu
2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 2/5] net/ice/base: support DCF query port ETS adminq Ting Xu
2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 3/5] net/ice: support DCF link status event handling Ting Xu
2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 4/5] net/ice: support QoS config VF bandwidth in DCF Ting Xu
2021-06-25  9:31   ` [dpdk-dev] [PATCH v3 5/5] net/iavf: query QoS cap and set queue TC mapping Ting Xu
2021-06-30  6:53 ` [dpdk-dev] [PATCH v4 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 3/7] net/ice: support DCF link status event handling Ting Xu
2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
2021-06-30  6:53   ` [dpdk-dev] [PATCH v4 7/7] doc: release note for ETS-based Tx QoS Ting Xu
2021-07-01 10:20 ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Ting Xu
2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling Ting Xu
2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
2021-07-07  9:23     ` Thomas Monjalon
2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
2021-07-01 10:20   ` [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS Ting Xu
2021-07-02  3:00   ` [dpdk-dev] [PATCH v5 0/7] Enable ETS-based Tx QoS for VF in DCF Zhang, Qi Z
2021-07-01 11:41 ` Ting Xu
2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 1/7] common/iavf: support ETS-based QoS offload configuration Ting Xu
2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 2/7] net/ice/base: support DCF query port ETS adminq Ting Xu
2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 3/7] net/ice: support DCF link status event handling Ting Xu
2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 4/7] net/ice: support QoS config VF bandwidth in DCF Ting Xu
2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 5/7] net/iavf: query QoS cap and set queue TC mapping Ting Xu
2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 6/7] net/iavf: check Tx packet with correct UP and queue Ting Xu
2021-07-01 11:41   ` [dpdk-dev] [PATCH v5 7/7] doc: release note for ETS-based Tx QoS Ting Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).