DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library
@ 2021-05-24 10:58 Liguzinski, WojciechX
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management Liguzinski, WojciechX
                   ` (5 more replies)
  0 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-05-24 10:58 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for queue congestion control (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted ). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.


Liguzinski, WojciechX (3):
  sched: add pie based congestion management
  example/qos_sched: add pie support
  example/ip_pipeline: add pie support

 config/rte_config.h                      |   1 -
 drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
 examples/ip_pipeline/tmgr.c              |   4 +-
 examples/qos_sched/app_thread.c          |   1 -
 examples/qos_sched/cfg_file.c            |  82 +++++++--
 examples/qos_sched/init.c                |   5 +-
 examples/qos_sched/profile.cfg           | 196 +++++++++++++-------
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
 lib/sched/rte_sched.h                    |  53 ++++--
 10 files changed, 411 insertions(+), 165 deletions(-)

-- 
2.17.1

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management
  2021-05-24 10:58 [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-05-24 10:58 ` Liguzinski, WojciechX
  2021-05-25  9:16   ` Morten Brørup
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 2/3] example/qos_sched: add pie support Liguzinski, WojciechX
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-05-24 10:58 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa

Implement pie based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
 lib/sched/rte_sched.h                    |  53 ++++--
 4 files changed, 210 insertions(+), 77 deletions(-)

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..bdcd05b0e6 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index cd87e688e4..a5fa8fadc8 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_cman_mode cman;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+
+static int
+rte_sched_red_config (struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->red_params[i][j].min_th |
+				 params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->red_params[i][j].wq_log2,
+				params->red_params[i][j].min_th,
+				params->red_params[i][j].max_th,
+				params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config (struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect \n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if(params->cman == RTE_SCHED_CMAN_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1169,30 +1263,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					rte_sched_free_memory(port, n_subports);
-
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					return -EINVAL;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1714,20 +1789,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
+	uint32_t cman)
 #else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
+	__rte_unused uint32_t cman)
 #endif
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
@@ -1735,8 +1810,8 @@ rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_CMAN
+	subport->stats.n_pkts_cman_dropped[tc_index] += cman;
 #endif
 }
 
@@ -1752,18 +1827,18 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
+	uint32_t cman)
 #else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
+	__rte_unused uint32_t cman)
 #endif
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
@@ -1771,39 +1846,50 @@ rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += cman;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1811,14 +1897,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
+
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue (pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1829,7 +1930,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1925,7 +2026,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2398,6 +2499,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2417,15 +2519,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_CMAN
+	if (subport->cman == RTE_SCHED_CMAN_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..692aba9442 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion management mode */
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_CMAN
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_CMAN
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 #endif
 
 	/** Bytes successfully written */
-- 
2.17.1

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH 2/3] example/qos_sched: add pie support
  2021-05-24 10:58 [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management Liguzinski, WojciechX
@ 2021-05-24 10:58 ` Liguzinski, WojciechX
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 3/3] example/ip_pipeline: " Liguzinski, WojciechX
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-05-24 10:58 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa

patch add support enable pie or red by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   5 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 199 insertions(+), 86 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..5a39e32269 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_mode = RTE_SCHED_CMAN_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threashold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+			subport_params[i].cman = cman_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].cman == RTE_SCHED_CMAN_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].red_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].red_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].red_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].red_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+						subport_params[i].pie_params[j].qdelay_ref =
+							pie_params[j].qdelay_ref;
+						subport_params[i].pie_params[j].dp_update_interval =
+							pie_params[j].dp_update_interval;
+						subport_params[i].pie_params[j].max_burst =
+							pie_params[j].max_burst;
+						subport_params[i].pie_params[j].tailq_th =
+							pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..b1babc2276 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,7 +212,8 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+	.cman = RTE_SCHED_CMAN_WRED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.17.1

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH 3/3] example/ip_pipeline: add pie support
  2021-05-24 10:58 [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management Liguzinski, WojciechX
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 2/3] example/qos_sched: add pie support Liguzinski, WojciechX
@ 2021-05-24 10:58 ` Liguzinski, WojciechX
  2021-05-24 16:19 ` [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Stephen Hemminger
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-05-24 10:58 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..406184e760 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,7 +25,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 .red_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.17.1

--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library
  2021-05-24 10:58 [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                   ` (2 preceding siblings ...)
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 3/3] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-05-24 16:19 ` Stephen Hemminger
  2021-05-25  8:56 ` Morten Brørup
  2021-06-09 10:53 ` [dpdk-dev] [RFC PATCH v1 " Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Stephen Hemminger @ 2021-05-24 16:19 UTC (permalink / raw)
  To: Liguzinski, WojciechX
  Cc: dev, jasvinder.singh, cristian.dumitrescu, savinay.dharmappa

On Mon, 24 May 2021 11:58:19 +0100
"Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:

> DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
> which is a situation when excess buffers in the network cause high latency and latency 
> variation. Currently, it supports RED for queue congestion control (which is designed 
> to control the queue length but it does not control latency directly and is now being 
> obsoleted ). However, more advanced queue management is required to address this problem
> and provide desirable quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
> controller Enhanced) that can effectively and directly control queuing latency to address 
> the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of existing and 
> adding a new set of data structures to the library, adding PIE related APIs. 
> This affects structures in public API/ABI. That is why deprecation notice is going
> to be prepared and sent.
> 
> 
> Liguzinski, WojciechX (3):
>   sched: add pie based congestion management
>   example/qos_sched: add pie support
>   example/ip_pipeline: add pie support
> 
>  config/rte_config.h                      |   1 -
>  drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
>  examples/ip_pipeline/tmgr.c              |   4 +-
>  examples/qos_sched/app_thread.c          |   1 -
>  examples/qos_sched/cfg_file.c            |  82 +++++++--
>  examples/qos_sched/init.c                |   5 +-
>  examples/qos_sched/profile.cfg           | 196 +++++++++++++-------
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
>  lib/sched/rte_sched.h                    |  53 ++++--
>  10 files changed, 411 insertions(+), 165 deletions(-)

What about FQ codel which is more widely deployed, has less configuration?

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library
  2021-05-24 10:58 [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                   ` (3 preceding siblings ...)
  2021-05-24 16:19 ` [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Stephen Hemminger
@ 2021-05-25  8:56 ` Morten Brørup
  2021-06-07 13:01   ` Liguzinski, WojciechX
  2021-06-09 10:53 ` [dpdk-dev] [RFC PATCH v1 " Liguzinski, WojciechX
  5 siblings, 1 reply; 178+ messages in thread
From: Morten Brørup @ 2021-05-25  8:56 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, jasvinder.singh, cristian.dumitrescu
  Cc: savinay.dharmappa

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> WojciechX
> Sent: Monday, 24 May 2021 12.58
> 
> DPDK sched library is equipped with mechanism that secures it from the
> bufferbloat problem
> which is a situation when excess buffers in the network cause high
> latency and latency
> variation. Currently, it supports RED for queue congestion control

The correct term is "active queue management", not "queue congestion control".

> (which is designed
> to control the queue length but it does not control latency directly
> and is now being
> obsoleted ).

Some might prefer other algorithms, such as PIE, CoDel, CAKE, etc., but RED is not obsolete!

> However, more advanced queue management is required to
> address this problem
> and provide desirable quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE"
> (Proportional Integral
> controller Enhanced) that can effectively and directly control queuing
> latency to address
> the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of
> existing and
> adding a new set of data structures to the library, adding PIE related
> APIs.
> This affects structures in public API/ABI. That is why deprecation
> notice is going
> to be prepared and sent.
> 
> 
> Liguzinski, WojciechX (3):
>   sched: add pie based congestion management
>   example/qos_sched: add pie support
>   example/ip_pipeline: add pie support

It's "PIE", not "pie". :-)

Nonetheless, the RFC looks good!

-Morten


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management
  2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management Liguzinski, WojciechX
@ 2021-05-25  9:16   ` Morten Brørup
  2021-06-09  8:36     ` Liguzinski, WojciechX
  0 siblings, 1 reply; 178+ messages in thread
From: Morten Brørup @ 2021-05-25  9:16 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, jasvinder.singh, cristian.dumitrescu
  Cc: savinay.dharmappa

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> WojciechX
> Sent: Monday, 24 May 2021 12.58
> 
> Implement pie based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
>  lib/sched/rte_sched.h                    |  53 ++++--
>  4 files changed, 210 insertions(+), 77 deletions(-)

Please use the abbreviation AQM instead of CMAN in the source code. This applies to the RTE_SCHED_CMAN definition, as well as functions, enums and variable names.

> +#ifdef RTE_SCHED_CMAN
> +
> +static int
> +rte_sched_red_config (struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->red_params[i][j].min_th |
> +				 params->red_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->red_config[i][j],
> +				params->red_params[i][j].wq_log2,
> +				params->red_params[i][j].min_th,
> +				params->red_params[i][j].max_th,
> +				params->red_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->cman = RTE_SCHED_CMAN_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config (struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect \n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->cman = RTE_SCHED_CMAN_PIE;
> +	return 0;
> +}

I suggest moving the two above functions from rte_sched.c to respectively rte_red.c and rte_pie.c.

> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_CMAN
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	uint32_t red)
> +	uint32_t cman)
>  #else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> +	__rte_unused uint32_t cman)
>  #endif

Two comments:
1. __rte_unused indicates that the variable might be unused, not that it is never used. So you do not need the first variant of this function declaration.
2. I suggest using "drops" as the variable name instead of "red" or "aqm".

> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_CMAN
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	uint32_t red)
> +	uint32_t cman)
>  #else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> +	__rte_unused uint32_t cman)
>  #endif

The above two comments also apply here.

> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> +	struct rte_pie *pie = &qe->pie;
> +
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue (pie, pkt_len, time);
>  }

Can the RED/PIE specific functions somehow move to rte_red.c and rte_pie.c without degrading performance? Perhaps function pointers are required. This prevents rte_sched.c from growing too much.

> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> 
> +/**
> + * Congestion management (CMAN) mode

"Active Queue Management (AQM) mode", please.

> + *
> + * This is used for controlling the admission of packets into a packet
> queue or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The
> *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each
> packet color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE)
> randomly
> + * drops a packet at the onset of the congestion and tries to control
> the
> + * latency around the target value. The congestion detection, however,
> is based
> + * on the queueing latency instead of the queue length like RED. For
> more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_cman_mode {
> +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED)
> */

Please stick with either the name RED or WRED, for consistency.

> +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +


> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for
> the sole
> use of the intended recipient(s). Any review or distribution by others
> is
> strictly prohibited. If you are not the intended recipient, please
> contact the
> sender and delete all copies.
> 

Please don't use this footer when sending to the DPDK mailing list.


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library
  2021-05-25  8:56 ` Morten Brørup
@ 2021-06-07 13:01   ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-07 13:01 UTC (permalink / raw)
  To: Morten Brørup, Singh, Jasvinder, Dumitrescu, Cristian
  Cc: Dharmappa, Savinay, dev


> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com> 
> Sent: Tuesday, May 25, 2021 10:57 AM
> To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Dharmappa, Savinay <savinay.dharmappa@intel.com>
> Subject: RE: [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library
>
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski, 
> > WojciechX
> > Sent: Monday, 24 May 2021 12.58
> > 
> > DPDK sched library is equipped with mechanism that secures it from the 
> > bufferbloat problem which is a situation when excess buffers in the 
> > network cause high latency and latency variation. Currently, it 
> > supports RED for queue congestion control
>
> The correct term is "active queue management", not "queue congestion control".

Good point. I will correct the naming.

>
> > (which is designed
> > to control the queue length but it does not control latency directly 
> > and is now being obsoleted ).
>
> Some might prefer other algorithms, such as PIE, CoDel, CAKE, etc., but RED is not obsolete!

I didn't write that it is obsolete, I just shortened what was written in the RFC (8033) on page 4:
"(...) AQM schemes, such as Random Early Detection
(RED) [RED] as suggested in [RFC2309] (which is now obsoleted by
[RFC7567]), have been around for well over a decade. RED is
implemented in a wide variety of network devices, both in hardware
and software. Unfortunately, due to the fact that RED needs careful
tuning of its parameters for various network conditions, most network
operators don't turn RED on. (...)"

Apologies if I weren't precise when thinking about such a summary. :-)

>
> > However, more advanced queue management is required to address this 
> > problem and provide desirable quality of service to users.
> > 
> > This solution (RFC) proposes usage of new algorithm called "PIE"
> > (Proportional Integral
> > controller Enhanced) that can effectively and directly control queuing 
> > latency to address the bufferbloat problem.
> > 
> > The implementation of mentioned functionality includes modification of 
> > existing and adding a new set of data structures to the library, 
> > adding PIE related APIs.
> > This affects structures in public API/ABI. That is why deprecation 
> > notice is going to be prepared and sent.
> > 
> > 
> > Liguzinski, WojciechX (3):
> >   sched: add pie based congestion management
> >   example/qos_sched: add pie support
> >   example/ip_pipeline: add pie support
>
> It's "PIE", not "pie". :-)

Sure, I will make a proper naming corrections ;-)

>
> Nonetheless, the RFC looks good!
>
> -Morten

Thanks,
Wojciech

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management
  2021-05-25  9:16   ` Morten Brørup
@ 2021-06-09  8:36     ` Liguzinski, WojciechX
  2021-06-09 12:35       ` Morten Brørup
  0 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-09  8:36 UTC (permalink / raw)
  To: Morten Brørup, Singh, Jasvinder, Dumitrescu, Cristian
  Cc: Dharmappa, Savinay, dev, Ajmera, Megha


> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com> 
> Sent: Tuesday, May 25, 2021 11:17 AM
> To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Dharmappa, Savinay <savinay.dharmappa@intel.com>
> Subject: RE: [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management
>
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski, 
> > WojciechX
> > Sent: Monday, 24 May 2021 12.58
> > 
> > Implement pie based congestion management based on rfc8033
> > 
> > Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> > ---
> >  drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
> >  lib/sched/meson.build                    |  10 +-
> >  lib/sched/rte_sched.c                    | 220 +++++++++++++++++------
> >  lib/sched/rte_sched.h                    |  53 ++++--
> >  4 files changed, 210 insertions(+), 77 deletions(-)
>
> Please use the abbreviation AQM instead of CMAN in the source code. This applies to the RTE_SCHED_CMAN definition, as well as functions, enums and variable names.

Ok, sure, I'm going to change that where applicable.

>
> > +#ifdef RTE_SCHED_CMAN
> > +
> > +static int
> > +rte_sched_red_config (struct rte_sched_port *port,
> > +	struct rte_sched_subport *s,
> > +	struct rte_sched_subport_params *params,
> > +	uint32_t n_subports)
> > +{
> > +	uint32_t i;
> > +
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > +
> > +		uint32_t j;
> > +
> > +		for (j = 0; j < RTE_COLORS; j++) {
> > +			/* if min/max are both zero, then RED is disabled */
> > +			if ((params->red_params[i][j].min_th |
> > +				 params->red_params[i][j].max_th) == 0) {
> > +				continue;
> > +			}
> > +
> > +			if (rte_red_config_init(&s->red_config[i][j],
> > +				params->red_params[i][j].wq_log2,
> > +				params->red_params[i][j].min_th,
> > +				params->red_params[i][j].max_th,
> > +				params->red_params[i][j].maxp_inv) != 0) {
> > +				rte_sched_free_memory(port, n_subports);
> > +
> > +				RTE_LOG(NOTICE, SCHED,
> > +				"%s: RED configuration init fails\n",
> > __func__);
> > +				return -EINVAL;
> > +			}
> > +		}
> > +	}
> > +	s->cman = RTE_SCHED_CMAN_WRED;
> > +	return 0;
> > +}
> > +
> > +static int
> > +rte_sched_pie_config (struct rte_sched_port *port,
> > +	struct rte_sched_subport *s,
> > +	struct rte_sched_subport_params *params,
> > +	uint32_t n_subports)
> > +{
> > +	uint32_t i;
> > +
> > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> > +			RTE_LOG(NOTICE, SCHED,
> > +			"%s: PIE tailq threshold incorrect \n", __func__);
> > +			return -EINVAL;
> > +		}
> > +
> > +		if (rte_pie_config_init(&s->pie_config[i],
> > +			params->pie_params[i].qdelay_ref,
> > +			params->pie_params[i].dp_update_interval,
> > +			params->pie_params[i].max_burst,
> > +			params->pie_params[i].tailq_th) != 0) {
> > +			rte_sched_free_memory(port, n_subports);
> > +
> > +			RTE_LOG(NOTICE, SCHED,
> > +			"%s: PIE configuration init fails\n", __func__);
> > +			return -EINVAL;
> > +			}
> > +	}
> > +	s->cman = RTE_SCHED_CMAN_PIE;
> > +	return 0;
> > +}
>
> I suggest moving the two above functions from rte_sched.c to respectively rte_red.c and rte_pie.c.

rte_red.c and rte_pie.c hold functions implementing those algorithms and they don't know anything about ports and subports. That part refers to scheduler implementation. Putting those methods respectively to those files would in my opinion break the 'functional isolation'.

>
> > -#ifdef RTE_SCHED_RED
> > +#ifdef RTE_SCHED_CMAN
> >  static inline void
> >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> > *port,
> >  	struct rte_sched_subport *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	uint32_t red)
> > +	uint32_t cman)
> >  #else
> >  static inline void
> >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> > *port,
> >  	struct rte_sched_subport *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	__rte_unused uint32_t red)
> > +	__rte_unused uint32_t cman)
> >  #endif
>
> Two comments:
> 1. __rte_unused indicates that the variable might be unused, not that it is never used. So you do not need the first variant of this function declaration.

Thanks, it's going to be fixed.

> 2. I suggest using "drops" as the variable name instead of "red" or "aqm".

Ok, I will change that.

>
> > -#ifdef RTE_SCHED_RED
> > +#ifdef RTE_SCHED_CMAN
> >  static inline void
> >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> > *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	uint32_t red)
> > +	uint32_t cman)
> >  #else
> >  static inline void
> >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> > *subport,
> >  	uint32_t qindex,
> >  	struct rte_mbuf *pkt,
> > -	__rte_unused uint32_t red)
> > +	__rte_unused uint32_t cman)
> >  #endif
>
> The above two comments also apply here.

Ok, it's going to be changed.

>
> > +static inline void
> > +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, 
> > +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> > +	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> > +	struct rte_pie *pie = &qe->pie;
> > +
> > +	/* Update queue length */
> > +	pie->qlen -= 1;
> > +	pie->qlen_bytes -= pkt_len;
> > +
> > +	rte_pie_dequeue (pie, pkt_len, time);
> >  }
>
> Can the RED/PIE specific functions somehow move to rte_red.c and rte_pie.c without degrading performance? Perhaps function pointers are required. This prevents rte_sched.c from growing too much.

Like I mentioned above, those functions use data structures known to scheduler and not directly to those algorithms which are implemented in those definition files. I will try think of a solution that could be suitable here.

>
> > diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> > 
> > +/**
> > + * Congestion management (CMAN) mode
>
> "Active Queue Management (AQM) mode", please.

Sure. ;-)

>
> > + *
> > + * This is used for controlling the admission of packets into a 
> > + packet
> > queue or
> > + * group of packet queues on congestion.
> > + *
> > + * The *Random Early Detection (RED)* algorithm works by proactively
> > dropping
> > + * more and more input packets as the queue occupancy builds up. When
> > the queue
> > + * is full or almost full, RED effectively works as *tail drop*. The
> > *Weighted
> > + * RED* algorithm uses a separate set of RED thresholds for each
> > packet color.
> > + *
> > + * Similar to RED, Proportional Integral Controller Enhanced (PIE)
> > randomly
> > + * drops a packet at the onset of the congestion and tries to control
> > the
> > + * latency around the target value. The congestion detection, 
> > + however,
> > is based
> > + * on the queueing latency instead of the queue length like RED. For
> > more
> > + * information, refer RFC8033.
> > + */
> > +enum rte_sched_cman_mode {
> > +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED)
> > */
>
> Please stick with either the name RED or WRED, for consistency.

WRED is just an extension of RED so in places where I found that it is suitable I have used such naming, otherwise RED. I think it shouldn't be changed in all places as it may be confusing.

>
> > +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> > Enhanced (PIE) */
> > +};
> > +
>
>
> > --------------------------------------------------------------
> > Intel Research and Development Ireland Limited Registered in Ireland 
> > Registered Office: Collinstown Industrial Park, Leixlip, County 
> > Kildare Registered Number: 308263
> > 
> > 
> > This e-mail and any attachments may contain confidential material for 
> > the sole use of the intended recipient(s). Any review or distribution 
> > by others is strictly prohibited. If you are not the intended 
> > recipient, please contact the sender and delete all copies.
> > 
>
> Please don't use this footer when sending to the DPDK mailing list.

Footer issue has been handled.

Thanks,
Wojtek

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v1 0/3] Add PIE support for HQoS library
  2021-05-24 10:58 [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                   ` (4 preceding siblings ...)
  2021-05-25  8:56 ` Morten Brørup
@ 2021-06-09 10:53 ` Liguzinski, WojciechX
  2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
                     ` (3 more replies)
  5 siblings, 4 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-09 10:53 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (3):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support

 config/rte_config.h                      |   1 -
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 examples/ip_pipeline/tmgr.c              |   6 +-
 examples/qos_sched/app_thread.c          |   1 -
 examples/qos_sched/cfg_file.c            |  82 ++++-
 examples/qos_sched/init.c                |   7 +-
 examples/qos_sched/profile.cfg           | 196 ++++++++----
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  79 +++++
 lib/sched/rte_pie.h                      | 387 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 ++++++++++----
 lib/sched/rte_sched.h                    |  53 +++-
 12 files changed, 876 insertions(+), 181 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v1 1/3] sched: add PIE based congestion management
  2021-06-09 10:53 ` [dpdk-dev] [RFC PATCH v1 " Liguzinski, WojciechX
@ 2021-06-09 10:53   ` Liguzinski, WojciechX
  2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-09 10:53 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  79 +++++
 lib/sched/rte_pie.h                      | 387 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 ++++++++++----
 lib/sched/rte_sched.h                    |  53 +++-
 6 files changed, 673 insertions(+), 91 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..9ac940ae7f
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+int
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL)
+		return -1;
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+
+	return 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL) {
+		return -1;
+	}
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..084d86893b
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14) to start measurement cycle (bytes) */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;            /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;    /**< Update interval for drop probability (in CPU cycles.) */
+	uint64_t max_burst;             /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;                /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;        /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count;  /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;     /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;      /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                  /**< Queue length (packets count) */
+	uint64_t qlen_bytes;            /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;           /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;       /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;            /**< Old queue delay (bytes) */
+	double drop_prob;               /**< Current packet drop probability */
+	double accu_prob;               /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param red [in,out] data pointer to RED runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseonds)
+ * @param tailq_min_th [in] min tail drop threshold for the queue (number of packets)
+ * @param tailq_max_th [in] max tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	double rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = (double) rte_rand()/RTE_RAND_MAX;
+
+	if (rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0) {
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	} else {
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+	}
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+rte_pie_dequeue (struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time * (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index cd87e688e4..afda39caf5 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1169,30 +1263,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					rte_sched_free_memory(port, n_subports);
-
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					return -EINVAL;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1714,29 +1789,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1752,58 +1818,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1811,14 +1880,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1829,7 +1913,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1925,7 +2009,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2398,6 +2482,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2417,15 +2502,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v1 2/3] example/qos_sched: add PIE support
  2021-06-09 10:53 ` [dpdk-dev] [RFC PATCH v1 " Liguzinski, WojciechX
  2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-06-09 10:53   ` Liguzinski, WojciechX
  2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 3/3] example/ip_pipeline: " Liguzinski, WojciechX
  2021-06-15  9:01   ` [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-09 10:53 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v1 3/3] example/ip_pipeline: add PIE support
  2021-06-09 10:53 ` [dpdk-dev] [RFC PATCH v1 " Liguzinski, WojciechX
  2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-06-09 10:53   ` Liguzinski, WojciechX
  2021-06-15  9:01   ` [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-09 10:53 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management
  2021-06-09  8:36     ` Liguzinski, WojciechX
@ 2021-06-09 12:35       ` Morten Brørup
  0 siblings, 0 replies; 178+ messages in thread
From: Morten Brørup @ 2021-06-09 12:35 UTC (permalink / raw)
  To: Liguzinski, WojciechX, Singh, Jasvinder, Dumitrescu, Cristian
  Cc: Dharmappa, Savinay, dev, Ajmera, Megha

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> WojciechX
> Sent: Wednesday, 9 June 2021 10.37
> 
> > From: Morten Brørup <mb@smartsharesystems.com>
> > Sent: Tuesday, May 25, 2021 11:17 AM
> >
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> > > WojciechX
> > > Sent: Monday, 24 May 2021 12.58
> > >
> > > Implement pie based congestion management based on rfc8033
> > >
> > > Signed-off-by: Liguzinski, WojciechX
> <wojciechx.liguzinski@intel.com>
> > > ---
> > >  drivers/net/softnic/rte_eth_softnic_tm.c |   4 +-
> > >  lib/sched/meson.build                    |  10 +-
> > >  lib/sched/rte_sched.c                    | 220 +++++++++++++++++--
> ----
> > >  lib/sched/rte_sched.h                    |  53 ++++--
> > >  4 files changed, 210 insertions(+), 77 deletions(-)
> >
> > Please use the abbreviation AQM instead of CMAN in the source code.
> This applies to the RTE_SCHED_CMAN definition, as well as functions,
> enums and variable names.
> 
> Ok, sure, I'm going to change that where applicable.
> 
> >
> > > +#ifdef RTE_SCHED_CMAN
> > > +
> > > +static int
> > > +rte_sched_red_config (struct rte_sched_port *port,
> > > +	struct rte_sched_subport *s,
> > > +	struct rte_sched_subport_params *params,
> > > +	uint32_t n_subports)
> > > +{
> > > +	uint32_t i;
> > > +
> > > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > > +
> > > +		uint32_t j;
> > > +
> > > +		for (j = 0; j < RTE_COLORS; j++) {
> > > +			/* if min/max are both zero, then RED is disabled */
> > > +			if ((params->red_params[i][j].min_th |
> > > +				 params->red_params[i][j].max_th) == 0) {
> > > +				continue;
> > > +			}
> > > +
> > > +			if (rte_red_config_init(&s->red_config[i][j],
> > > +				params->red_params[i][j].wq_log2,
> > > +				params->red_params[i][j].min_th,
> > > +				params->red_params[i][j].max_th,
> > > +				params->red_params[i][j].maxp_inv) != 0) {
> > > +				rte_sched_free_memory(port, n_subports);
> > > +
> > > +				RTE_LOG(NOTICE, SCHED,
> > > +				"%s: RED configuration init fails\n",
> > > __func__);
> > > +				return -EINVAL;
> > > +			}
> > > +		}
> > > +	}
> > > +	s->cman = RTE_SCHED_CMAN_WRED;
> > > +	return 0;
> > > +}
> > > +
> > > +static int
> > > +rte_sched_pie_config (struct rte_sched_port *port,
> > > +	struct rte_sched_subport *s,
> > > +	struct rte_sched_subport_params *params,
> > > +	uint32_t n_subports)
> > > +{
> > > +	uint32_t i;
> > > +
> > > +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> > > +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> > > +			RTE_LOG(NOTICE, SCHED,
> > > +			"%s: PIE tailq threshold incorrect \n", __func__);
> > > +			return -EINVAL;
> > > +		}
> > > +
> > > +		if (rte_pie_config_init(&s->pie_config[i],
> > > +			params->pie_params[i].qdelay_ref,
> > > +			params->pie_params[i].dp_update_interval,
> > > +			params->pie_params[i].max_burst,
> > > +			params->pie_params[i].tailq_th) != 0) {
> > > +			rte_sched_free_memory(port, n_subports);
> > > +
> > > +			RTE_LOG(NOTICE, SCHED,
> > > +			"%s: PIE configuration init fails\n", __func__);
> > > +			return -EINVAL;
> > > +			}
> > > +	}
> > > +	s->cman = RTE_SCHED_CMAN_PIE;
> > > +	return 0;
> > > +}
> >
> > I suggest moving the two above functions from rte_sched.c to
> respectively rte_red.c and rte_pie.c.
> 
> rte_red.c and rte_pie.c hold functions implementing those algorithms
> and they don't know anything about ports and subports. That part refers
> to scheduler implementation. Putting those methods respectively to
> those files would in my opinion break the 'functional isolation'.
> 

Then it makes sense keeping them here. You can ignore my suggestion.

> >
> > > -#ifdef RTE_SCHED_RED
> > > +#ifdef RTE_SCHED_CMAN
> > >  static inline void
> > >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> > > *port,
> > >  	struct rte_sched_subport *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	uint32_t red)
> > > +	uint32_t cman)
> > >  #else
> > >  static inline void
> > >  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> > > *port,
> > >  	struct rte_sched_subport *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	__rte_unused uint32_t red)
> > > +	__rte_unused uint32_t cman)
> > >  #endif
> >
> > Two comments:
> > 1. __rte_unused indicates that the variable might be unused, not that
> it is never used. So you do not need the first variant of this function
> declaration.
> 
> Thanks, it's going to be fixed.
> 
> > 2. I suggest using "drops" as the variable name instead of "red" or
> "aqm".
> 
> Ok, I will change that.
> 
> >
> > > -#ifdef RTE_SCHED_RED
> > > +#ifdef RTE_SCHED_CMAN
> > >  static inline void
> > >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> > > *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	uint32_t red)
> > > +	uint32_t cman)
> > >  #else
> > >  static inline void
> > >  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> > > *subport,
> > >  	uint32_t qindex,
> > >  	struct rte_mbuf *pkt,
> > > -	__rte_unused uint32_t red)
> > > +	__rte_unused uint32_t cman)
> > >  #endif
> >
> > The above two comments also apply here.
> 
> Ok, it's going to be changed.
> 
> >
> > > +static inline void
> > > +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> > > +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> > > +	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> > > +	struct rte_pie *pie = &qe->pie;
> > > +
> > > +	/* Update queue length */
> > > +	pie->qlen -= 1;
> > > +	pie->qlen_bytes -= pkt_len;
> > > +
> > > +	rte_pie_dequeue (pie, pkt_len, time);
> > >  }
> >
> > Can the RED/PIE specific functions somehow move to rte_red.c and
> rte_pie.c without degrading performance? Perhaps function pointers are
> required. This prevents rte_sched.c from growing too much.
> 
> Like I mentioned above, those functions use data structures known to
> scheduler and not directly to those algorithms which are implemented in
> those definition files. I will try think of a solution that could be
> suitable here.

Now that I understand your line of thinking, I agree with you. You can ignore my comment here too.

> 
> >
> > > diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> > >
> > > +/**
> > > + * Congestion management (CMAN) mode
> >
> > "Active Queue Management (AQM) mode", please.
> 
> Sure. ;-)
> 
> >
> > > + *
> > > + * This is used for controlling the admission of packets into a
> > > + packet
> > > queue or
> > > + * group of packet queues on congestion.
> > > + *
> > > + * The *Random Early Detection (RED)* algorithm works by
> proactively
> > > dropping
> > > + * more and more input packets as the queue occupancy builds up.
> When
> > > the queue
> > > + * is full or almost full, RED effectively works as *tail drop*.
> The
> > > *Weighted
> > > + * RED* algorithm uses a separate set of RED thresholds for each
> > > packet color.
> > > + *
> > > + * Similar to RED, Proportional Integral Controller Enhanced (PIE)
> > > randomly
> > > + * drops a packet at the onset of the congestion and tries to
> control
> > > the
> > > + * latency around the target value. The congestion detection,
> > > + however,
> > > is based
> > > + * on the queueing latency instead of the queue length like RED.
> For
> > > more
> > > + * information, refer RFC8033.
> > > + */
> > > +enum rte_sched_cman_mode {
> > > +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED)
> > > */
> >
> > Please stick with either the name RED or WRED, for consistency.
> 
> WRED is just an extension of RED so in places where I found that it is
> suitable I have used such naming, otherwise RED. I think it shouldn't
> be changed in all places as it may be confusing.
> 

I don't have a strong opinion about this, and you are putting some thoughts into it, so I'm happy with that.

> >
> > > +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> > > Enhanced (PIE) */
> > > +};
> > > +
> >

[snip]

> Footer issue has been handled.
> 
> Thanks,
> Wojtek

:-)


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library
  2021-06-09 10:53 ` [dpdk-dev] [RFC PATCH v1 " Liguzinski, WojciechX
                     ` (2 preceding siblings ...)
  2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 3/3] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-06-15  9:01   ` Liguzinski, WojciechX
  2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
                       ` (3 more replies)
  3 siblings, 4 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-15  9:01 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (3):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support

 config/rte_config.h                      |   1 -
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 examples/ip_pipeline/tmgr.c              |   6 +-
 examples/qos_sched/app_thread.c          |   1 -
 examples/qos_sched/cfg_file.c            |  82 ++++-
 examples/qos_sched/init.c                |   7 +-
 examples/qos_sched/profile.cfg           | 196 ++++++++----
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  78 +++++
 lib/sched/rte_pie.h                      | 389 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 12 files changed, 877 insertions(+), 181 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v2 1/3] sched: add PIE based congestion management
  2021-06-15  9:01   ` [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-06-15  9:01     ` Liguzinski, WojciechX
  2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-15  9:01 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  78 +++++
 lib/sched/rte_pie.h                      | 389 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 6 files changed, 674 insertions(+), 91 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..f538dda21d
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+int
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL)
+		return -1;
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+
+	return 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..a0059aad04
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+									*to start measurement cycle (bytes)
+									*/
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_min_th [in] min tail drop threshold for the queue (number of packets)
+ * @param tailq_max_th [in] max tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index cd87e688e4..afda39caf5 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1169,30 +1263,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					rte_sched_free_memory(port, n_subports);
-
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					return -EINVAL;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1714,29 +1789,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1752,58 +1818,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1811,14 +1880,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1829,7 +1913,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1925,7 +2009,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2398,6 +2482,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2417,15 +2502,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v2 2/3] example/qos_sched: add PIE support
  2021-06-15  9:01   ` [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-06-15  9:01     ` Liguzinski, WojciechX
  2021-06-15 12:23       ` Morten Brørup
  2021-06-15  9:02     ` [dpdk-dev] [RFC PATCH v2 3/3] example/ip_pipeline: " Liguzinski, WojciechX
  2021-06-21  7:35     ` [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-15  9:01 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v2 3/3] example/ip_pipeline: add PIE support
  2021-06-15  9:01   ` [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-06-15  9:02     ` Liguzinski, WojciechX
  2021-06-21  7:35     ` [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-15  9:02 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v2 2/3] example/qos_sched: add PIE support
  2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-06-15 12:23       ` Morten Brørup
  0 siblings, 0 replies; 178+ messages in thread
From: Morten Brørup @ 2021-06-15 12:23 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, jasvinder.singh, cristian.dumitrescu
  Cc: savinay.dharmappa, megha.ajmera

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Liguzinski,
> WojciechX
> Sent: Tuesday, 15 June 2021 11.02

[snip]

> diff --git a/config/rte_config.h b/config/rte_config.h
> index 590903c07d..48132f27df 100644
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> @@ -89,7 +89,6 @@
>  #define RTE_MAX_LCORE_FREQS 64
> 
>  /* rte_sched defines */
> -#undef RTE_SCHED_RED

Should the above be removed, or replaced with:
#undef RTE_SCHED_AQM

>  #undef RTE_SCHED_COLLECT_STATS
>  #undef RTE_SCHED_SUBPORT_TC_OV
>  #define RTE_SCHED_PORT_N_GRINDERS 8


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library
  2021-06-15  9:01   ` [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                       ` (2 preceding siblings ...)
  2021-06-15  9:02     ` [dpdk-dev] [RFC PATCH v2 3/3] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-06-21  7:35     ` Liguzinski, WojciechX
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
                         ` (3 more replies)
  3 siblings, 4 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-21  7:35 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (3):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support

 config/rte_config.h                      |   1 -
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 examples/ip_pipeline/tmgr.c              |   6 +-
 examples/qos_sched/app_thread.c          |   1 -
 examples/qos_sched/cfg_file.c            |  82 ++++-
 examples/qos_sched/init.c                |   7 +-
 examples/qos_sched/profile.cfg           | 196 ++++++++----
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  78 +++++
 lib/sched/rte_pie.h                      | 388 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 +++++++++----
 lib/sched/rte_sched.h                    |  53 +++-
 12 files changed, 876 insertions(+), 181 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management
  2021-06-21  7:35     ` [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-06-21  7:35       ` Liguzinski, WojciechX
  2021-06-21 18:17         ` Stephen Hemminger
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-21  7:35 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  78 +++++
 lib/sched/rte_pie.h                      | 388 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 +++++++++----
 lib/sched/rte_sched.h                    |  53 +++-
 6 files changed, 673 insertions(+), 91 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..f538dda21d
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+int
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL)
+		return -1;
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+
+	return 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..9295f39c07
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,388 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index cd87e688e4..afda39caf5 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1169,30 +1263,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					rte_sched_free_memory(port, n_subports);
-
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					return -EINVAL;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1714,29 +1789,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1752,58 +1818,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1811,14 +1880,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1829,7 +1913,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1925,7 +2009,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2398,6 +2482,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2417,15 +2502,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v3 2/3] example/qos_sched: add PIE support
  2021-06-21  7:35     ` [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-06-21  7:35       ` Liguzinski, WojciechX
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: " Liguzinski, WojciechX
  2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-21  7:35 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: add PIE support
  2021-06-21  7:35     ` [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-06-21  7:35       ` Liguzinski, WojciechX
  2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-21  7:35 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-06-21 18:17         ` Stephen Hemminger
  2021-06-22  7:39           ` Liguzinski, WojciechX
  0 siblings, 1 reply; 178+ messages in thread
From: Stephen Hemminger @ 2021-06-21 18:17 UTC (permalink / raw)
  To: Liguzinski, WojciechX
  Cc: dev, jasvinder.singh, cristian.dumitrescu, savinay.dharmappa,
	megha.ajmera

On Mon, 21 Jun 2021 08:35:04 +0100
"Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:

> +/**
> + * @brief Initialises run-time data
> + *
> + * @param pie [in,out] data pointer to PIE runtime data
> + *
> + * @return Operation status
> + * @retval 0 success
> + * @retval !0 error
> + */
> +int
> +rte_pie_rt_data_init(struct rte_pie *pie);

All the new code needs to be marked experimental.
Why return an error on the init() function, then you are going to
make application check the result and lead to lots more code.

Other places in DPDK use void for init functions.

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management
  2021-06-21 18:17         ` Stephen Hemminger
@ 2021-06-22  7:39           ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-06-22  7:39 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Singh, Jasvinder, Dumitrescu, Cristian, Dharmappa, Savinay,
	Ajmera, Megha


> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org> 
> Sent: Monday, June 21, 2021 8:18 PM
> To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Cc: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Dharmappa, Savinay <savinay.dharmappa@intel.com>; Ajmera, Megha <megha.ajmera@intel.com>
> Subject: Re: [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management
>
> On Mon, 21 Jun 2021 08:35:04 +0100
> "Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:
>
> > +/**
> > + * @brief Initialises run-time data
> > + *
> > + * @param pie [in,out] data pointer to PIE runtime data
> > + *
> > + * @return Operation status
> > + * @retval 0 success
> > + * @retval !0 error
> > + */
> > +int
> > +rte_pie_rt_data_init(struct rte_pie *pie);
>
> All the new code needs to be marked experimental.
> Why return an error on the init() function, then you are going to make application check the result and lead to lots more code.
>
> Other places in DPDK use void for init functions.

Thanks for comments.
I'll apply necessary updates to V4 of RFC patches.

BR,
Wojciech

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library
  2021-06-21  7:35     ` [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                         ` (2 preceding siblings ...)
  2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-07-05  8:04       ` Liguzinski, WojciechX
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
                           ` (4 more replies)
  3 siblings, 5 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-07-05  8:04 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (3):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support

 config/rte_config.h                      |   1 -
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 examples/ip_pipeline/tmgr.c              |   6 +-
 examples/qos_sched/app_thread.c          |   1 -
 examples/qos_sched/cfg_file.c            |  82 ++++-
 examples/qos_sched/init.c                |   7 +-
 examples/qos_sched/profile.cfg           | 196 +++++++----
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 13 files changed, 888 insertions(+), 181 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management
  2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-07-05  8:04         ` Liguzinski, WojciechX
  2021-07-16 13:20           ` Dumitrescu, Cristian
  2021-07-16 15:11           ` Dumitrescu, Cristian
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 2/3] example/qos_sched: add pie support Liguzinski, WojciechX
                           ` (3 subsequent siblings)
  4 siblings, 2 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-07-05  8:04 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 229 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 7 files changed, 685 insertions(+), 91 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index cd87e688e4..afda39caf5 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1169,30 +1263,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					rte_sched_free_memory(port, n_subports);
-
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					return -EINVAL;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1714,29 +1789,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1752,58 +1818,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1811,14 +1880,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1829,7 +1913,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1925,7 +2009,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2398,6 +2482,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2417,15 +2502,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v4 2/3] example/qos_sched: add pie support
  2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-07-05  8:04         ` Liguzinski, WojciechX
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: add PIE support Liguzinski, WojciechX
                           ` (2 subsequent siblings)
  4 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-07-05  8:04 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: add PIE support
  2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 2/3] example/qos_sched: add pie support Liguzinski, WojciechX
@ 2021-07-05  8:04         ` Liguzinski, WojciechX
  2021-07-16 12:46         ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Dumitrescu, Cristian
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
  4 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-07-05  8:04 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: savinay.dharmappa, megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library
  2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                           ` (2 preceding siblings ...)
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: add PIE support Liguzinski, WojciechX
@ 2021-07-16 12:46         ` Dumitrescu, Cristian
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
  4 siblings, 0 replies; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-07-16 12:46 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Singh, Jasvinder
  Cc: Dharmappa, Savinay, Ajmera, Megha

Hi Wojciech,

Thank you for doing this work!

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, July 5, 2021 9:04 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Dharmappa, Savinay <savinay.dharmappa@intel.com>; Ajmera, Megha
> <megha.ajmera@intel.com>
> Subject: [RFC PATCH v4 0/3] Add PIE support for HQoS library
> 
> DPDK sched library is equipped with mechanism that secures it from the
> bufferbloat problem
> which is a situation when excess buffers in the network cause high latency
> and latency
> variation. Currently, it supports RED for active queue management (which is
> designed
> to control the queue length but it does not control latency directly and is now
> being
> obsoleted). However, more advanced queue management is required to
> address this problem
> and provide desirable quality of service to users.

As already mentioned by other reviewers, I don't think RED/WRED is getting obsoleted. This entire paragraph is a bit fuzzy and not really adding much value IMO, I propose to remove it.

> 
> This solution (RFC) proposes usage of new algorithm called "PIE"
> (Proportional Integral
> controller Enhanced) that can effectively and directly control queuing latency
> to address
> the bufferbloat problem.

Please add a link to the public RFC for PIE in this cover letter.

> 
> The implementation of mentioned functionality includes modification of
> existing and
> adding a new set of data structures to the library, adding PIE related APIs.
> This affects structures in public API/ABI. That is why deprecation notice is
> going
> to be prepared and sent.

I think you are stating the obvious here, how about removing this paragraph as well?

> 
> Liguzinski, WojciechX (3):
>   sched: add PIE based congestion management
>   example/qos_sched: add PIE support
>   example/ip_pipeline: add PIE support
> 
>  config/rte_config.h                      |   1 -
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  examples/ip_pipeline/tmgr.c              |   6 +-
>  examples/qos_sched/app_thread.c          |   1 -
>  examples/qos_sched/cfg_file.c            |  82 ++++-
>  examples/qos_sched/init.c                |   7 +-
>  examples/qos_sched/profile.cfg           | 196 +++++++----
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 229 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  13 files changed, 888 insertions(+), 181 deletions(-)
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 
> --
> 2.17.1

Regards,
Cristian

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-07-16 13:20           ` Dumitrescu, Cristian
  2021-07-16 15:11           ` Dumitrescu, Cristian
  1 sibling, 0 replies; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-07-16 13:20 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Singh, Jasvinder
  Cc: Dharmappa, Savinay, Ajmera, Megha

Hi Wojciech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, July 5, 2021 9:04 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Dharmappa, Savinay <savinay.dharmappa@intel.com>; Ajmera, Megha
> <megha.ajmera@intel.com>
> Subject: [RFC PATCH v4 1/3] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 229 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 91 deletions(-)
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 

<snip>

> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> index c1a772b70c..a5fe6266cd 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Active Queue Management */
> +#ifdef RTE_SCHED_AQM

We typically use the term Congestion Management for this, it is already used in rte_tm.h for example. Please replace AQM with CMAN _everywhere_:

#ifdef RTE_SCHED_CMAN

>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Active Queue Management (AQM) mode
> + *
> + * This is used for controlling the admission of packets into a packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
> + * drops a packet at the onset of the congestion and tries to control the
> + * latency around the target value. The congestion detection, however, is
> based
> + * on the queueing latency instead of the queue length like RED. For more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_aqm_mode {
> +	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection
> (WRED) */
> +	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time
> @@ -174,9 +197,17 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_AQM
> +	/** Active Queue Management mode */
> +	enum rte_sched_aqm_mode aqm;
> +
> +	RTE_STD_C11
> +	union {
> +		/** WRED parameters */
> +		struct rte_red_params
> wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
>  };
> 

We cannot have definitions of global variables in header files, we can only have extern declarations in header files and the definition in .c files.

Please create a global structure called rte_sched_cman_params that includes the both the WRED and the PIE parameters:

struct rte_sched_cman_params {
	enum rte_sched_cman cman_mode;

	union {
		/** WRED parameters */
		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];

		/** PIE parameters */
		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
	};
};

Then please instantiate this structure in one of the .c files.

Please do not rename red to wred (as done in multiple places in this patch set).

> @@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
>  	/** Number of bytes dropped for each traffic class */
>  	uint64_t
> n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	/** Number of packets dropped by red */
> -	uint64_t
> n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +#ifdef RTE_SCHED_AQM
> +	/** Number of packets dropped by active queue management
> scheme */
> +	uint64_t
> n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  #endif
>  };
> 

Please remove the #ifdefs and consolidate these stats into a single generic structure:

struct rte_sched_subport_stats {
	...
	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
	...
};

You can use the n_pkts_cman_dropped field for both WRED and PIE, depending on the cman_mode.

> @@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
>  	/** Packets dropped */
>  	uint64_t n_pkts_dropped;
> 
> -#ifdef RTE_SCHED_RED
> -	/** Packets dropped by RED */
> -	uint64_t n_pkts_red_dropped;
> +#ifdef RTE_SCHED_AQM
> +	/** Packets dropped by active queue management scheme */
> +	uint64_t n_pkts_aqm_dropped;
>  #endif
> 
>  	/** Bytes successfully written */

Please remove the #ifdefs and consolidate these stats into a single generic structure:

struct rte_sched_queue_stats {
	...
	uint64_t n_pkts_cman_dropped;
	...
};

You can use the n_pkts_cman_dropped field for both WRED and PIE, depending on the cman_mode.

Regards,
Cristian

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management
  2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-07-16 13:20           ` Dumitrescu, Cristian
@ 2021-07-16 15:11           ` Dumitrescu, Cristian
  1 sibling, 0 replies; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-07-16 15:11 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Singh, Jasvinder
  Cc: Dharmappa, Savinay, Ajmera, Megha

Hi Wojciech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, July 5, 2021 9:04 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Dharmappa, Savinay <savinay.dharmappa@intel.com>; Ajmera, Megha
> <megha.ajmera@intel.com>
> Subject: [RFC PATCH v4 1/3] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 229 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 91 deletions(-)
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev,
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
> @@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev,
> uint32_t subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];

Please do NOT rename red to wred in this patch set.

>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =

<snip>

> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index cd87e688e4..afda39caf5 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif

Please use the proposed rte_sched_cman_params structure.


> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1169,30 +1263,11 @@ rte_sched_subport_config(struct
> rte_sched_port *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					rte_sched_free_memory(port,
> n_subports);
> -
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					return -EINVAL;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1714,29 +1789,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)

Please rename drops with n_pkts_cman_dropped.

>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 

Due to the recommended generic field n_pkts_cman_dropped of the rte_sched_subport_stats structure, you don't need the macro here anymore :)

> @@ -1752,58 +1818,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)

Please rename drops with n_pkts_cman_dropped.

>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +

You don't want to declare new variables in the middle of the function, but you do want to reduce their scope, so maybe use the else for this (although not needed, since you call return on the if branch)?

> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1811,14 +1880,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +

Please rename this function to rte_sched_port_red_set_queue_empty_timestamp() to reflect that it is RED-specific.

> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 

Please do the processing in this function only if CMAN is PIE (add an if statement), and remove the if statement from where it gets called.

Also you need to define this function as do-nothing on the #else branch, right?

>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1829,7 +1913,7 @@ static inline int rte_sched_port_red_drop(struct
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1925,7 +2009,7 @@ rte_sched_port_enqueue_qwa(struct
> rte_sched_port *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2398,6 +2482,7 @@ grinder_schedule(struct rte_sched_port *port,
>  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2417,15 +2502,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) &
> be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);

As stated before, move the if statement within the function, and remove the macro from here by defining this same function as do-nothing on the #else branch above.

> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;

Regards,
Cristian

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v5 0/5] Add PIE support for HQoS library
  2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                           ` (3 preceding siblings ...)
  2021-07-16 12:46         ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Dumitrescu, Cristian
@ 2021-09-07  7:33         ` Liguzinski, WojciechX
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                             ` (5 more replies)
  4 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07  7:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management (which is designed
to control the queue length but it does not control latency directly and is now being
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (3):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1076 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |    6 +-
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |   82 +-
 examples/qos_sched/init.c                    |    7 +-
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  228 ++--
 lib/sched/rte_sched.h                        |   53 +-
 lib/sched/version.map                        |    3 +
 19 files changed, 2061 insertions(+), 190 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
@ 2021-09-07  7:33           ` Liguzinski, WojciechX
  2021-09-07 19:14             ` Stephen Hemminger
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 2/5] example/qos_sched: add pie support Liguzinski, WojciechX
                             ` (4 subsequent siblings)
  5 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07  7:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 228 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 7 files changed, 685 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..320435ed91 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1815,14 +1885,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v5 2/5] example/qos_sched: add pie support
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-09-07  7:33           ` Liguzinski, WojciechX
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 3/5] example/ip_pipeline: add PIE support Liguzinski, WojciechX
                             ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07  7:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v5 3/5] example/ip_pipeline: add PIE support
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 2/5] example/qos_sched: add pie support Liguzinski, WojciechX
@ 2021-09-07  7:33           ` Liguzinski, WojciechX
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                             ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07  7:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v5 4/5] doc/guides/prog_guide: added PIE
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
                             ` (2 preceding siblings ...)
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 3/5] example/ip_pipeline: add PIE support Liguzinski, WojciechX
@ 2021-09-07  7:33           ` Liguzinski, WojciechX
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07  7:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady-state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v5 5/5] app/test: add tests for PIE
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
                             ` (3 preceding siblings ...)
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-09-07  7:33           ` Liguzinski, WojciechX
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07  7:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1076 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |    9 +-
 lib/sched/rte_sched.c     |    2 +-
 6 files changed, 1111 insertions(+), 4 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..ef4004b559
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1076 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+															(milliseconds) */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp
+																up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets
+																not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing
+																	RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Master test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display
+													for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure
+													for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used
+													for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte = 0;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc>0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+    return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie,
+                    uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+    pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+    return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+															 double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+	        *diff = 0.0;
+	} else {
+	        *diff = (abs_diff / drop_prob) * 100.0;
+	        if (*diff > tolerance) {
+	                ret = 0;
+	        }
+        }
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts( tcfg->tvar->clk_freq );
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+        uint32_t i = 0;
+
+        for (i = 0; i < attempts; i++) {
+                int ret = 0;
+
+                //
+                // enqueue
+                //
+                ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len,
+														get_port_ts());
+		        //
+		        // check if target actual queue size has been reached
+		        //
+                if (ret == 0) {
+					return 0;
+                }
+        }
+        //
+        // no success
+        //
+        return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+                                 struct rte_pie *pie,
+                                 uint32_t *qlen,
+                                 uint32_t num_ops,
+                                 uint32_t *enqueued,
+                                 uint32_t *dropped)
+{
+        uint32_t i = 0;
+
+        for (i = 0; i < num_ops; i++) {
+                int ret = 0;
+
+                /**
+                 * enqueue
+                 */
+                ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+											get_port_ts());
+                if (ret == 0)
+                        (*enqueued)++;
+                else
+                        (*dropped)++;
+        }
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] ={0};
+static uint32_t  ft_dropped[] ={0};
+static uint32_t  ft_enqueued[] ={0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob !=0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate !=0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+		       label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+		       drop_prob * 100.0, drop_rate * 100.0, diff,
+	               (double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {10, 20, 50, 150, 300, 600,
+													900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+	               (double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob !=0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate !=0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+		       label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+		       drop_prob * 100.0, drop_rate * 100.0, diff,
+	               (double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * 	aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0) {
+			(*enqueued)++;
+		}
+		else {
+			(*dropped)++;
+		}
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+	    	*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+								 uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+								"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+								"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+								"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+	return;
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+    return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0) {
+		return -1;
+	}
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..d9cf61e04c 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index 320435ed91..480b6e531d 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -1877,7 +1877,7 @@ rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
 	struct rte_pie *pie = &qe->pie;
 
-	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library
  2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
                             ` (4 preceding siblings ...)
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-09-07 14:11           ` Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                               ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07 14:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (3):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |    6 +-
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |   82 +-
 examples/qos_sched/init.c                    |    7 +-
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  228 ++--
 lib/sched/rte_sched.h                        |   53 +-
 lib/sched/version.map                        |    3 +
 19 files changed, 2050 insertions(+), 190 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v6 1/5] sched: add PIE based congestion management
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-09-07 14:11             ` Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                               ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07 14:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 228 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 7 files changed, 685 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..320435ed91 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1815,14 +1885,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v6 2/5] example/qos_sched: add PIE support
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-09-07 14:11             ` Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                               ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07 14:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v6 3/5] example/ip_pipeline: add PIE support
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-09-07 14:11             ` Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                               ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07 14:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v6 4/5] doc/guides/prog_guide: added PIE
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                               ` (2 preceding siblings ...)
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-09-07 14:11             ` Liguzinski, WojciechX
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07 14:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady-state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v6 5/5] app/test: add tests for PIE
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                               ` (3 preceding siblings ...)
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-09-07 14:11             ` Liguzinski, WojciechX
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-07 14:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |    9 +-
 lib/sched/rte_sched.c     |    2 +-
 6 files changed, 1100 insertions(+), 4 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..8521387ee0
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+	                                 * (milliseconds)
+	                                 */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Master test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {10, 20, 50, 150, 300, 600,
+											900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..d9cf61e04c 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index 320435ed91..480b6e531d 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -1877,7 +1877,7 @@ rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
 	struct rte_pie *pie = &qe->pie;
 
-	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management
  2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-09-07 19:14             ` Stephen Hemminger
  2021-09-08  8:49               ` Liguzinski, WojciechX
  2021-10-14 15:13               ` Liguzinski, WojciechX
  0 siblings, 2 replies; 178+ messages in thread
From: Stephen Hemminger @ 2021-09-07 19:14 UTC (permalink / raw)
  To: Liguzinski, WojciechX
  Cc: dev, jasvinder.singh, cristian.dumitrescu, megha.ajmera

On Tue,  7 Sep 2021 07:33:24 +0000
"Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:

> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)
> + */
> +static inline void
> +__rte_experimental
> +_calc_drop_probability(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)

This code adds a lot of inline functions in the name of performance.
But every inline like this means the internal ABI for the implmentation
has to be exposed.

You would probably get a bigger performance bump from not using floating
point in the internal math, than the minor performance optimization from
having so many inlines.

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management
  2021-09-07 19:14             ` Stephen Hemminger
@ 2021-09-08  8:49               ` Liguzinski, WojciechX
  2021-10-14 15:13               ` Liguzinski, WojciechX
  1 sibling, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-08  8:49 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Singh, Jasvinder, Dumitrescu, Cristian, Ajmera, Megha

Thanks Stephen,

I will do my best to apply your comments.

Best Regards,
Wojciech Liguzinski

-----Original Message-----
From: Stephen Hemminger <stephen@networkplumber.org> 
Sent: Tuesday, September 7, 2021 9:15 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Cc: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Ajmera, Megha <megha.ajmera@intel.com>
Subject: Re: [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management

On Tue,  7 Sep 2021 07:33:24 +0000
"Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:

> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)  */ static 
> +inline void __rte_experimental _calc_drop_probability(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)

This code adds a lot of inline functions in the name of performance.
But every inline like this means the internal ABI for the implmentation has to be exposed.

You would probably get a bigger performance bump from not using floating point in the internal math, than the minor performance optimization from having so many inlines.

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library
  2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                               ` (4 preceding siblings ...)
  2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-09-22  7:46             ` Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                 ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-22  7:46 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |    6 +-
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |   82 +-
 examples/qos_sched/init.c                    |    7 +-
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  228 ++--
 lib/sched/rte_sched.h                        |   53 +-
 lib/sched/version.map                        |    3 +
 19 files changed, 2050 insertions(+), 190 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v7 1/5] sched: add PIE based congestion management
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-09-22  7:46               ` Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                 ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-22  7:46 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 228 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 7 files changed, 685 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..320435ed91 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1815,14 +1885,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v7 2/5] example/qos_sched: add PIE support
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-09-22  7:46               ` Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                 ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-22  7:46 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v7 3/5] example/ip_pipeline: add PIE support
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-09-22  7:46               ` Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                 ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-22  7:46 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v7 4/5] doc/guides/prog_guide: added PIE
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                 ` (2 preceding siblings ...)
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-09-22  7:46               ` Liguzinski, WojciechX
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-22  7:46 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady-state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v7 5/5] app/test: add tests for PIE
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                 ` (3 preceding siblings ...)
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-09-22  7:46               ` Liguzinski, WojciechX
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-22  7:46 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |    9 +-
 lib/sched/rte_sched.c     |    2 +-
 6 files changed, 1100 insertions(+), 4 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..6fae55edfd
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					 * (milliseconds)
+					 */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] =
+				{10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..d9cf61e04c 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index 320435ed91..480b6e531d 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -1877,7 +1877,7 @@ rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
 	struct rte_pie *pie = &qe->pie;
 
-	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library
  2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                 ` (4 preceding siblings ...)
  2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-09-23  9:45               ` Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                   ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-23  9:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |    6 +-
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |   82 +-
 examples/qos_sched/init.c                    |    7 +-
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  228 ++--
 lib/sched/rte_sched.h                        |   53 +-
 lib/sched/version.map                        |    3 +
 19 files changed, 2050 insertions(+), 190 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v8 1/5] sched: add PIE based congestion management
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-09-23  9:45                 ` Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-23  9:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 228 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 7 files changed, 685 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..320435ed91 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1815,14 +1885,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v8 2/5] example/qos_sched: add PIE support
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-09-23  9:45                 ` Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-23  9:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v8 3/5] example/ip_pipeline: add PIE support
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-09-23  9:45                 ` Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-23  9:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v8 4/5] doc/guides/prog_guide: added PIE
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                   ` (2 preceding siblings ...)
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-09-23  9:45                 ` Liguzinski, WojciechX
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-23  9:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady-state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [RFC PATCH v8 5/5] app/test: add tests for PIE
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                   ` (3 preceding siblings ...)
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-09-23  9:45                 ` Liguzinski, WojciechX
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-09-23  9:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |   17 +-
 lib/sched/rte_sched.c     |    2 +-
 6 files changed, 1104 insertions(+), 8 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index 320435ed91..480b6e531d 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -1877,7 +1877,7 @@ rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
 	struct rte_pie *pie = &qe->pie;
 
-	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library
  2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                   ` (4 preceding siblings ...)
  2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-11  7:55                 ` Liguzinski, WojciechX
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                     ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-11  7:55 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |    6 +-
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |   82 +-
 examples/qos_sched/init.c                    |    7 +-
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  228 ++--
 lib/sched/rte_sched.h                        |   53 +-
 lib/sched/version.map                        |    3 +
 19 files changed, 2050 insertions(+), 190 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-11  7:55                   ` Liguzinski, WojciechX
  2021-10-12 15:59                     ` Dumitrescu, Cristian
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-11  7:55 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 228 +++++++++----
 lib/sched/rte_sched.h                    |  53 ++-
 lib/sched/version.map                    |   3 +
 7 files changed, 685 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..5b6c4e6d4b 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->wred_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..320435ed91 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,13 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	enum rte_sched_aqm_mode aqm;
+#ifdef RTE_SCHED_AQM
+	RTE_STD_C11
+	union {
+		struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_AQM
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->wred_params[i][j].min_th |
+				 params->wred_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->wred_config[i][j],
+				params->wred_params[i][j].wq_log2,
+				params->wred_params[i][j].min_th,
+				params->wred_params[i][j].max_th,
+				params->wred_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->aqm = RTE_SCHED_AQM_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->pie_params[i].qdelay_ref,
+			params->pie_params[i].dp_update_interval,
+			params->pie_params[i].max_burst,
+			params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->aqm = RTE_SCHED_AQM_PIE;
+	return 0;
+}
+
+static int
+rte_sched_aqm_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->aqm == RTE_SCHED_AQM_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->aqm == RTE_SCHED_AQM_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_AQM
+		status = rte_sched_aqm_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
+#ifdef RTE_SCHED_AQM
+	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
 #endif
 }
 
@@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t drops)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_AQM
+	qe->stats.n_pkts_aqm_dropped += drops;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->wred_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
 }
 
 static inline void
@@ -1815,14 +1885,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->aqm == RTE_SCHED_AQM_WRED) {
+		struct rte_red *red = &qe->red;
+
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+	struct rte_pie *pie = &qe->pie;
 
-	rte_red_mark_queue_empty(red, port->time);
+	/* Update queue length */
+	pie->qlen -= 1;
+	pie->qlen_bytes -= pkt_len;
+
+	rte_pie_dequeue(pie, pkt_len, time);
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 
 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
+
 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+#ifdef RTE_SCHED_AQM
+	if (subport->aqm == RTE_SCHED_AQM_PIE)
+		rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+#endif
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..a5fe6266cd 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Active Queue Management */
+#ifdef RTE_SCHED_AQM
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Active Queue Management (AQM) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_aqm_mode {
+	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +197,17 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	/** Active Queue Management mode */
+	enum rte_sched_aqm_mode aqm;
+
+	RTE_STD_C11
+	union {
+		/** WRED parameters */
+		struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 };
 
@@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+#ifdef RTE_SCHED_AQM
+	/** Number of packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 #endif
 };
 
@@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
+#ifdef RTE_SCHED_AQM
+	/** Packets dropped by active queue management scheme */
+	uint64_t n_pkts_aqm_dropped;
 #endif
 
 	/** Bytes successfully written */
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v9 2/5] example/qos_sched: add PIE support
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-11  7:55                   ` Liguzinski, WojciechX
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-11  7:55 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |   7 +-
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 5 files changed, 200 insertions(+), 87 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..657763ca90 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_AQM
+	enum rte_sched_aqm_mode aqm_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		aqm_mode = RTE_SCHED_AQM_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		aqm_mode = RTE_SCHED_AQM_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_AQM */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_AQM
+			subport_params[i].aqm = aqm_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].aqm == RTE_SCHED_AQM_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].wred_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].wred_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].wred_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].wred_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..96ba3b6616 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -212,8 +212,9 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
-	.red_params = {
+#ifdef RTE_SCHED_AQM
+	.aqm = RTE_SCHED_AQM_WRED,
+	.wred_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +280,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 	},
 };
 
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v9 3/5] example/ip_pipeline: add PIE support
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-11  7:55                   ` Liguzinski, WojciechX
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-11  7:55 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..73da2da870 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,8 +25,8 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
+#ifdef RTE_SCHED_AQM
+.wred_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_AQM */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v9 4/5] doc/guides/prog_guide: added PIE
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                     ` (2 preceding siblings ...)
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-11  7:55                   ` Liguzinski, WojciechX
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-11  7:55 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady-state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v9 5/5] app/test: add tests for PIE
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                     ` (3 preceding siblings ...)
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-11  7:55                   ` Liguzinski, WojciechX
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-11  7:55 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |   17 +-
 lib/sched/rte_sched.c     |    2 +-
 6 files changed, 1104 insertions(+), 8 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index 320435ed91..480b6e531d 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -1877,7 +1877,7 @@ rte_sched_port_aqm_drop(struct rte_sched_port *port,
 	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
 	struct rte_pie *pie = &qe->pie;
 
-	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles);
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-12 15:59                     ` Dumitrescu, Cristian
  2021-10-12 18:34                       ` Liguzinski, WojciechX
  0 siblings, 1 reply; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-10-12 15:59 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Singh, Jasvinder; +Cc: Ajmera, Megha

Hi Wojchech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 11, 2021 8:56 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v9 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 228 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 90 deletions(-)
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev,
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
> @@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev,
> uint32_t subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];
>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =
> diff --git a/lib/sched/meson.build b/lib/sched/meson.build
> index b24f7b8775..e7ae9bcf19 100644
> --- a/lib/sched/meson.build
> +++ b/lib/sched/meson.build
> @@ -1,11 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause
>  # Copyright(c) 2017 Intel Corporation
> 
> -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
> -headers = files(
> -        'rte_approx.h',
> -        'rte_red.h',
> -        'rte_sched.h',
> -        'rte_sched_common.h',
> -)
> +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
> +headers = files('rte_sched.h', 'rte_sched_common.h',
> +		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
>  deps += ['mbuf', 'meter']
> diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
> new file mode 100644
> index 0000000000..2fcecb2db4
> --- /dev/null
> +++ b/lib/sched/rte_pie.c
> @@ -0,0 +1,82 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#include <stdlib.h>
> +
> +#include "rte_pie.h"
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_malloc.h>
> +
> +#ifdef __INTEL_COMPILER
> +#pragma warning(disable:2259) /* conversion may lose significant bits */
> +#endif
> +
> +void
> +rte_pie_rt_data_init(struct rte_pie *pie)
> +{
> +	if (pie == NULL) {
> +		/* Allocate memory to use the PIE data structure */
> +		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
> +
> +		if (pie == NULL)
> +			RTE_LOG(ERR, SCHED, "%s: Memory allocation
> fails\n", __func__);
> +	}
> +
> +	pie->active = 0;
> +	pie->in_measurement = 0;
> +	pie->departed_bytes_count = 0;
> +	pie->start_measurement = 0;
> +	pie->last_measurement = 0;
> +	pie->qlen = 0;
> +	pie->avg_dq_time = 0;
> +	pie->burst_allowance = 0;
> +	pie->qdelay_old = 0;
> +	pie->drop_prob = 0;
> +	pie->accu_prob = 0;
> +}
> +
> +int
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th)
> +{
> +	uint64_t tsc_hz = rte_get_tsc_hz();
> +
> +	if (pie_cfg == NULL)
> +		return -1;
> +
> +	if (qdelay_ref <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for qdelay_ref\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (dp_update_interval <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for dp_update_interval\n",
> __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (max_burst <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for max_burst\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (tailq_th <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for tailq_th\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
> +	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) /
> 1000;
> +	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
> +	pie_cfg->tailq_th = tailq_th;
> +
> +	return 0;
> +}
> diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
> new file mode 100644
> index 0000000000..f83c95664f
> --- /dev/null
> +++ b/lib/sched/rte_pie.h
> @@ -0,0 +1,393 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef __RTE_PIE_H_INCLUDED__
> +#define __RTE_PIE_H_INCLUDED__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Proportional Integral controller Enhanced (PIE)
> + *
> + *
> + ***/
> +
> +#include <stdint.h>
> +
> +#include <rte_random.h>
> +#include <rte_debug.h>
> +
> +#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
> +				     * to start measurement cycle (bytes)
> +				     */
> +#define RTE_DQ_WEIGHT      0.25    /**< Weight
> (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
> +#define RTE_ALPHA          0.125   /**< Weights in drop probability
> calculations */
> +#define RTE_BETA           1.25    /**< Weights in drop probability calculations
> */
> +#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number
> */
> +
> +
> +/**
> + * PIE configuration parameters passed by user
> + *
> + */
> +struct rte_pie_params {
> +	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
> +	uint16_t dp_update_interval;   /**< Update interval for drop
> probability (milliseconds) */
> +	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * PIE configuration parameters
> + *
> + */
> +struct rte_pie_config {
> +	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
> +	uint64_t dp_update_interval;   /**< Update interval for drop
> probability (in CPU cycles) */
> +	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * RED run-time data
> + */
> +struct rte_pie {
> +	uint16_t active;               /**< Flag for activating/deactivating pie */
> +	uint16_t in_measurement;       /**< Flag for activation of
> measurement cycle */
> +	uint32_t departed_bytes_count; /**< Number of bytes departed in
> current measurement cycle */
> +	uint64_t start_measurement;    /**< Time to start to measurement
> cycle (in cpu cycles) */
> +	uint64_t last_measurement;     /**< Time of last measurement (in
> cpu cycles) */
> +	uint64_t qlen;                 /**< Queue length (packets count) */
> +	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
> +	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in
> cpu cycles) */
> +	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
> +	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
> +	double drop_prob;              /**< Current packet drop probability */
> +	double accu_prob;              /**< Accumulated packet drop probability
> */
> +};
> +
> +/**
> + * @brief Initialises run-time data
> + *
> + * @param pie [in,out] data pointer to PIE runtime data
> + */
> +void
> +__rte_experimental
> +rte_pie_rt_data_init(struct rte_pie *pie);
> +
> +/**
> + * @brief Configures a single PIE configuration parameter structure.
> + *
> + * @param pie_cfg [in,out] config pointer to a PIE configuration parameter
> structure
> + * @param qdelay_ref [in]  latency target(milliseconds)
> + * @param dp_update_interval [in] update interval for drop probability
> (milliseconds)
> + * @param max_burst [in] maximum burst allowance (milliseconds)
> + * @param tailq_th [in] tail drop threshold for the queue (number of
> packets)
> + *
> + * @return Operation status
> + * @retval 0 success
> + * @retval !0 error
> + */
> +int
> +__rte_experimental
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th);
> +
> +/**
> + * @brief Decides packet enqueue when queue is empty
> + *
> + * Note: packet is never dropped in this particular case.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval !0 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len)
> +{
> +	RTE_ASSERT(pkt_len != NULL);
> +
> +	/* Update the PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/**
> +	 * If the queue has been idle for a while, turn off PIE and Reset
> counters
> +	 */
> +	if ((pie->active == 1) &&
> +		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)
> + */
> +static inline void
> +__rte_experimental
> +_calc_drop_probability(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)
> +{
> +	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
> +
> +	/* Note: can be implemented using integer multiply.
> +	 * DQ_THRESHOLD is power of 2 value.
> +	 */
> +	double current_qdelay = pie->qlen * (pie->avg_dq_time /
> RTE_DQ_THRESHOLD);
> +
> +	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
> +		RTE_BETA * (current_qdelay - pie->qdelay_old);
> +
> +	if (pie->drop_prob < 0.000001)
> +		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
> +	else if (pie->drop_prob < 0.00001)
> +		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
> +	else if (pie->drop_prob < 0.0001)
> +		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
> +	else if (pie->drop_prob < 0.001)
> +		p = p * 0.03125;                    /* (1/32) = 0.03125   */
> +	else if (pie->drop_prob < 0.01)
> +		p = p * 0.125;                      /* (1/8) = 0.125    */
> +	else if (pie->drop_prob < 0.1)
> +		p = p * 0.5;                        /* (1/2) = 0.5    */
> +
> +	if (pie->drop_prob >= 0.1 && p > 0.02)
> +		p = 0.02;
> +
> +	pie->drop_prob += p;
> +
> +	double qdelay = qdelay_ref * 0.5;
> +
> +	/*  Exponentially decay drop prob when congestion goes away  */
> +	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
> +		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
> +
> +	/* Bound drop probability */
> +	if (pie->drop_prob < 0)
> +		pie->drop_prob = 0;
> +	if (pie->drop_prob > 1)
> +		pie->drop_prob = 1;
> +
> +	pie->qdelay_old = current_qdelay;
> +	pie->last_measurement = time;
> +
> +	uint64_t burst_allowance = pie->burst_allowance - pie_cfg-
> >dp_update_interval;
> +
> +	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + *
> + * @return operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +_rte_pie_drop(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie)
> +{
> +	uint64_t rand_value;
> +	double qdelay = pie_cfg->qdelay_ref * 0.5;
> +
> +	/* PIE is active but the queue is not congested: return 0 */
> +	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
> +		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
> +		return 0;
> +
> +	if (pie->drop_prob == 0)
> +		pie->accu_prob = 0;
> +
> +	/* For practical reasons, drop probability can be further scaled
> according
> +	 * to packet size, but one needs to set a bound to avoid unnecessary
> bias
> +	 * Random drop
> +	 */
> +	pie->accu_prob += pie->drop_prob;
> +
> +	if (pie->accu_prob < 0.85)
> +		return 0;
> +
> +	if (pie->accu_prob >= 8.5)
> +		return 1;
> +
> +	rand_value = rte_rand()/RTE_RAND_MAX;
> +
> +	if ((double)rand_value < pie->drop_prob) {
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped for non-
> empty queue
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on max threshold criterion
> + * @retval 2 drop the packet based on mark probability criterion
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	/* Check queue space against the tail drop threshold */
> +	if (pie->qlen >= pie_cfg->tailq_th) {
> +
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	if (pie->active) {
> +		/* Update drop probability after certain interval */
> +		if ((time - pie->last_measurement) >= pie_cfg-
> >dp_update_interval)
> +			_calc_drop_probability(pie_cfg, pie, time);
> +
> +		/* Decide whether packet to be dropped or enqueued */
> +		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance ==
> 0)
> +			return 2;
> +	}
> +
> +	/* When queue occupancy is over a certain threshold, turn on PIE */
> +	if ((pie->active == 0) &&
> +		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
> +		pie->active = 1;
> +		pie->qdelay_old = 0;
> +		pie->drop_prob = 0;
> +		pie->in_measurement = 1;
> +		pie->departed_bytes_count = 0;
> +		pie->avg_dq_time = 0;
> +		pie->last_measurement = time;
> +		pie->burst_allowance = pie_cfg->max_burst;
> +		pie->accu_prob = 0;
> +		pie->start_measurement = time;
> +	}
> +
> +	/* when queue has been idle for a while, turn off PIE and Reset
> counters */
> +	if (pie->active == 1 &&
> +		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	/* Update PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped
> + * Updates run time data and gives verdict whether to enqueue or drop the
> packet.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param qlen [in] queue length
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on drop probility criteria
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	const unsigned int qlen,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	RTE_ASSERT(pie_cfg != NULL);
> +	RTE_ASSERT(pie != NULL);
> +
> +	if (qlen != 0)
> +		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len,
> time);
> +	else
> +		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
> +}
> +
> +/**
> + * @brief PIE rate estimation method
> + * Called on each packet departure.
> + *
> + * @param pie [in] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp in cpu cycles
> + */
> +static inline void
> +__rte_experimental
> +rte_pie_dequeue(struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	uint64_t time)
> +{
> +	/* Dequeue rate estimation */
> +	if (pie->in_measurement) {
> +		pie->departed_bytes_count += pkt_len;
> +
> +		/* Start a new measurement cycle when enough packets */
> +		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
> +			uint64_t dq_time = time - pie->start_measurement;
> +
> +			if (pie->avg_dq_time == 0)
> +				pie->avg_dq_time = dq_time;
> +			else
> +				pie->avg_dq_time = dq_time *
> RTE_DQ_WEIGHT + pie->avg_dq_time
> +					* (1 - RTE_DQ_WEIGHT);
> +
> +			pie->in_measurement = 0;
> +		}
> +	}
> +
> +	/* Start measurement cycle when enough data in the queue */
> +	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie-
> >in_measurement == 0)) {
> +		pie->in_measurement = 1;
> +		pie->start_measurement = time;
> +		pie->departed_bytes_count = 0;
> +	}
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PIE_H_INCLUDED__ */
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index a858f61f95..320435ed91 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct
> rte_sched_port *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					ret = -EINVAL;
> -					goto out;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 
> @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len;
> -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +
> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1815,14 +1885,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port
> *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +
> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 
>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct
> rte_sched_port *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,
>  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) &
> be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);
> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> index c1a772b70c..a5fe6266cd 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Active Queue Management */
> +#ifdef RTE_SCHED_AQM
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Active Queue Management (AQM) mode
> + *
> + * This is used for controlling the admission of packets into a packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
> + * drops a packet at the onset of the congestion and tries to control the
> + * latency around the target value. The congestion detection, however, is
> based
> + * on the queueing latency instead of the queue length like RED. For more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_aqm_mode {
> +	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection
> (WRED) */
> +	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time
> @@ -174,9 +197,17 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_AQM
> +	/** Active Queue Management mode */
> +	enum rte_sched_aqm_mode aqm;
> +
> +	RTE_STD_C11
> +	union {
> +		/** WRED parameters */
> +		struct rte_red_params
> wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
>  };
> 
> @@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
>  	/** Number of bytes dropped for each traffic class */
>  	uint64_t
> n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	/** Number of packets dropped by red */
> -	uint64_t
> n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +#ifdef RTE_SCHED_AQM
> +	/** Number of packets dropped by active queue management
> scheme */
> +	uint64_t
> n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  #endif
>  };
> 
> @@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
>  	/** Packets dropped */
>  	uint64_t n_pkts_dropped;
> 
> -#ifdef RTE_SCHED_RED
> -	/** Packets dropped by RED */
> -	uint64_t n_pkts_red_dropped;
> +#ifdef RTE_SCHED_AQM
> +	/** Packets dropped by active queue management scheme */
> +	uint64_t n_pkts_aqm_dropped;
>  #endif
> 
>  	/** Bytes successfully written */
> diff --git a/lib/sched/version.map b/lib/sched/version.map
> index ace284b7de..3422821ac8 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;
>  };
> --
> 2.25.1

NACK

I see that none of my previous comments from the V4 review got implemented, is there any reason to silently discard all of them?

https://patches.dpdk.org/project/dpdk/patch/20210705080421.18736-2-wojciechx.liguzinski@intel.com/

I did not see any reply from you on my comments, so I assumed that you accepted and implemented most of them, but I see that none of them were picked up.

Also, I don't see any revision history, just the version counter gets incremented, so reviewing a new version of your patch requires re-reading every line of code, which is time consuming. Could you please add a revision history?

Thanks,
Cristian

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management
  2021-10-12 15:59                     ` Dumitrescu, Cristian
@ 2021-10-12 18:34                       ` Liguzinski, WojciechX
  2021-10-14 16:02                         ` Liguzinski, WojciechX
  0 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-12 18:34 UTC (permalink / raw)
  To: Dumitrescu, Cristian, dev, Singh, Jasvinder
  Cc: Ajmera, Megha, Cybura, LukaszX, Zegota, AnnaX

Hi Cristian,

-----Original Message-----
From: Dumitrescu, Cristian <cristian.dumitrescu@intel.com> 
Sent: Tuesday, October 12, 2021 6:00 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>
Subject: RE: [PATCH v9 1/5] sched: add PIE based congestion management

Hi Wojchech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 11, 2021 8:56 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; 
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v9 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 228 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 90 deletions(-)  create mode 
> 100644 lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, 
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id) @@ 
> -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t 
> subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];
>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =
> diff --git a/lib/sched/meson.build b/lib/sched/meson.build index 
> b24f7b8775..e7ae9bcf19 100644
> --- a/lib/sched/meson.build
> +++ b/lib/sched/meson.build
> @@ -1,11 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel 
> Corporation
> 
> -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c') -headers 
> = files(
> -        'rte_approx.h',
> -        'rte_red.h',
> -        'rte_sched.h',
> -        'rte_sched_common.h',
> -)
> +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 
> +'rte_pie.c') headers = files('rte_sched.h', 'rte_sched_common.h',
> +		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
>  deps += ['mbuf', 'meter']
> diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c new file mode 
> 100644 index 0000000000..2fcecb2db4
> --- /dev/null
> +++ b/lib/sched/rte_pie.c
> @@ -0,0 +1,82 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#include <stdlib.h>
> +
> +#include "rte_pie.h"
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_malloc.h>
> +
> +#ifdef __INTEL_COMPILER
> +#pragma warning(disable:2259) /* conversion may lose significant bits 
> +*/ #endif
> +
> +void
> +rte_pie_rt_data_init(struct rte_pie *pie) {
> +	if (pie == NULL) {
> +		/* Allocate memory to use the PIE data structure */
> +		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
> +
> +		if (pie == NULL)
> +			RTE_LOG(ERR, SCHED, "%s: Memory allocation
> fails\n", __func__);
> +	}
> +
> +	pie->active = 0;
> +	pie->in_measurement = 0;
> +	pie->departed_bytes_count = 0;
> +	pie->start_measurement = 0;
> +	pie->last_measurement = 0;
> +	pie->qlen = 0;
> +	pie->avg_dq_time = 0;
> +	pie->burst_allowance = 0;
> +	pie->qdelay_old = 0;
> +	pie->drop_prob = 0;
> +	pie->accu_prob = 0;
> +}
> +
> +int
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th)
> +{
> +	uint64_t tsc_hz = rte_get_tsc_hz();
> +
> +	if (pie_cfg == NULL)
> +		return -1;
> +
> +	if (qdelay_ref <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for qdelay_ref\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (dp_update_interval <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for dp_update_interval\n",
> __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (max_burst <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for max_burst\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (tailq_th <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for tailq_th\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
> +	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) /
> 1000;
> +	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
> +	pie_cfg->tailq_th = tailq_th;
> +
> +	return 0;
> +}
> diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h new file mode 
> 100644 index 0000000000..f83c95664f
> --- /dev/null
> +++ b/lib/sched/rte_pie.h
> @@ -0,0 +1,393 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#ifndef __RTE_PIE_H_INCLUDED__
> +#define __RTE_PIE_H_INCLUDED__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Proportional Integral controller Enhanced (PIE)
> + *
> + *
> + ***/
> +
> +#include <stdint.h>
> +
> +#include <rte_random.h>
> +#include <rte_debug.h>
> +
> +#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
> +				     * to start measurement cycle (bytes)
> +				     */
> +#define RTE_DQ_WEIGHT      0.25    /**< Weight
> (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
> +#define RTE_ALPHA          0.125   /**< Weights in drop probability
> calculations */
> +#define RTE_BETA           1.25    /**< Weights in drop probability calculations
> */
> +#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number
> */
> +
> +
> +/**
> + * PIE configuration parameters passed by user
> + *
> + */
> +struct rte_pie_params {
> +	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
> +	uint16_t dp_update_interval;   /**< Update interval for drop
> probability (milliseconds) */
> +	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * PIE configuration parameters
> + *
> + */
> +struct rte_pie_config {
> +	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
> +	uint64_t dp_update_interval;   /**< Update interval for drop
> probability (in CPU cycles) */
> +	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * RED run-time data
> + */
> +struct rte_pie {
> +	uint16_t active;               /**< Flag for activating/deactivating pie */
> +	uint16_t in_measurement;       /**< Flag for activation of
> measurement cycle */
> +	uint32_t departed_bytes_count; /**< Number of bytes departed in
> current measurement cycle */
> +	uint64_t start_measurement;    /**< Time to start to measurement
> cycle (in cpu cycles) */
> +	uint64_t last_measurement;     /**< Time of last measurement (in
> cpu cycles) */
> +	uint64_t qlen;                 /**< Queue length (packets count) */
> +	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
> +	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in
> cpu cycles) */
> +	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
> +	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
> +	double drop_prob;              /**< Current packet drop probability */
> +	double accu_prob;              /**< Accumulated packet drop probability
> */
> +};
> +
> +/**
> + * @brief Initialises run-time data
> + *
> + * @param pie [in,out] data pointer to PIE runtime data  */ void 
> +__rte_experimental rte_pie_rt_data_init(struct rte_pie *pie);
> +
> +/**
> + * @brief Configures a single PIE configuration parameter structure.
> + *
> + * @param pie_cfg [in,out] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param qdelay_ref [in]  latency target(milliseconds)
> + * @param dp_update_interval [in] update interval for drop 
> + probability
> (milliseconds)
> + * @param max_burst [in] maximum burst allowance (milliseconds)
> + * @param tailq_th [in] tail drop threshold for the queue (number of
> packets)
> + *
> + * @return Operation status
> + * @retval 0 success
> + * @retval !0 error
> + */
> +int
> +__rte_experimental
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th);
> +
> +/**
> + * @brief Decides packet enqueue when queue is empty
> + *
> + * Note: packet is never dropped in this particular case.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval !0 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len)
> +{
> +	RTE_ASSERT(pkt_len != NULL);
> +
> +	/* Update the PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/**
> +	 * If the queue has been idle for a while, turn off PIE and Reset
> counters
> +	 */
> +	if ((pie->active == 1) &&
> +		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)  */ static 
> +inline void __rte_experimental _calc_drop_probability(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)
> +{
> +	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
> +
> +	/* Note: can be implemented using integer multiply.
> +	 * DQ_THRESHOLD is power of 2 value.
> +	 */
> +	double current_qdelay = pie->qlen * (pie->avg_dq_time /
> RTE_DQ_THRESHOLD);
> +
> +	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
> +		RTE_BETA * (current_qdelay - pie->qdelay_old);
> +
> +	if (pie->drop_prob < 0.000001)
> +		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
> +	else if (pie->drop_prob < 0.00001)
> +		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
> +	else if (pie->drop_prob < 0.0001)
> +		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
> +	else if (pie->drop_prob < 0.001)
> +		p = p * 0.03125;                    /* (1/32) = 0.03125   */
> +	else if (pie->drop_prob < 0.01)
> +		p = p * 0.125;                      /* (1/8) = 0.125    */
> +	else if (pie->drop_prob < 0.1)
> +		p = p * 0.5;                        /* (1/2) = 0.5    */
> +
> +	if (pie->drop_prob >= 0.1 && p > 0.02)
> +		p = 0.02;
> +
> +	pie->drop_prob += p;
> +
> +	double qdelay = qdelay_ref * 0.5;
> +
> +	/*  Exponentially decay drop prob when congestion goes away  */
> +	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
> +		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
> +
> +	/* Bound drop probability */
> +	if (pie->drop_prob < 0)
> +		pie->drop_prob = 0;
> +	if (pie->drop_prob > 1)
> +		pie->drop_prob = 1;
> +
> +	pie->qdelay_old = current_qdelay;
> +	pie->last_measurement = time;
> +
> +	uint64_t burst_allowance = pie->burst_allowance - pie_cfg-
> >dp_update_interval;
> +
> +	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + *
> + * @return operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +_rte_pie_drop(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie)
> +{
> +	uint64_t rand_value;
> +	double qdelay = pie_cfg->qdelay_ref * 0.5;
> +
> +	/* PIE is active but the queue is not congested: return 0 */
> +	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
> +		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
> +		return 0;
> +
> +	if (pie->drop_prob == 0)
> +		pie->accu_prob = 0;
> +
> +	/* For practical reasons, drop probability can be further scaled
> according
> +	 * to packet size, but one needs to set a bound to avoid unnecessary
> bias
> +	 * Random drop
> +	 */
> +	pie->accu_prob += pie->drop_prob;
> +
> +	if (pie->accu_prob < 0.85)
> +		return 0;
> +
> +	if (pie->accu_prob >= 8.5)
> +		return 1;
> +
> +	rand_value = rte_rand()/RTE_RAND_MAX;
> +
> +	if ((double)rand_value < pie->drop_prob) {
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped for non-
> empty queue
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on max threshold criterion
> + * @retval 2 drop the packet based on mark probability criterion  */ 
> +static inline int __rte_experimental rte_pie_enqueue_nonempty(const 
> +struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	/* Check queue space against the tail drop threshold */
> +	if (pie->qlen >= pie_cfg->tailq_th) {
> +
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	if (pie->active) {
> +		/* Update drop probability after certain interval */
> +		if ((time - pie->last_measurement) >= pie_cfg-
> >dp_update_interval)
> +			_calc_drop_probability(pie_cfg, pie, time);
> +
> +		/* Decide whether packet to be dropped or enqueued */
> +		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance ==
> 0)
> +			return 2;
> +	}
> +
> +	/* When queue occupancy is over a certain threshold, turn on PIE */
> +	if ((pie->active == 0) &&
> +		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
> +		pie->active = 1;
> +		pie->qdelay_old = 0;
> +		pie->drop_prob = 0;
> +		pie->in_measurement = 1;
> +		pie->departed_bytes_count = 0;
> +		pie->avg_dq_time = 0;
> +		pie->last_measurement = time;
> +		pie->burst_allowance = pie_cfg->max_burst;
> +		pie->accu_prob = 0;
> +		pie->start_measurement = time;
> +	}
> +
> +	/* when queue has been idle for a while, turn off PIE and Reset
> counters */
> +	if (pie->active == 1 &&
> +		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	/* Update PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped
> + * Updates run time data and gives verdict whether to enqueue or drop 
> +the
> packet.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param qlen [in] queue length
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on drop probility criteria  */ 
> +static inline int __rte_experimental rte_pie_enqueue(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	const unsigned int qlen,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	RTE_ASSERT(pie_cfg != NULL);
> +	RTE_ASSERT(pie != NULL);
> +
> +	if (qlen != 0)
> +		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len,
> time);
> +	else
> +		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); }
> +
> +/**
> + * @brief PIE rate estimation method
> + * Called on each packet departure.
> + *
> + * @param pie [in] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp in cpu cycles  */ static 
> +inline void __rte_experimental rte_pie_dequeue(struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	uint64_t time)
> +{
> +	/* Dequeue rate estimation */
> +	if (pie->in_measurement) {
> +		pie->departed_bytes_count += pkt_len;
> +
> +		/* Start a new measurement cycle when enough packets */
> +		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
> +			uint64_t dq_time = time - pie->start_measurement;
> +
> +			if (pie->avg_dq_time == 0)
> +				pie->avg_dq_time = dq_time;
> +			else
> +				pie->avg_dq_time = dq_time *
> RTE_DQ_WEIGHT + pie->avg_dq_time
> +					* (1 - RTE_DQ_WEIGHT);
> +
> +			pie->in_measurement = 0;
> +		}
> +	}
> +
> +	/* Start measurement cycle when enough data in the queue */
> +	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie-
> >in_measurement == 0)) {
> +		pie->in_measurement = 1;
> +		pie->start_measurement = time;
> +		pie->departed_bytes_count = 0;
> +	}
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PIE_H_INCLUDED__ */
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c index 
> a858f61f95..320435ed91 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port 
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port 
> *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					ret = -EINVAL;
> -					goto out;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len; -#ifdef 
> RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 
> @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len; -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +
> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1815,14 +1885,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +
> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, 
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 
>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port 
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct 
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port 
> *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);
> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h index 
> c1a772b70c..a5fe6266cd 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Active Queue Management */
> +#ifdef RTE_SCHED_AQM
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Active Queue Management (AQM) mode
> + *
> + * This is used for controlling the admission of packets into a 
> +packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The 
> + *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each 
> + packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) 
> + randomly
> + * drops a packet at the onset of the congestion and tries to control 
> + the
> + * latency around the target value. The congestion detection, 
> + however, is
> based
> + * on the queueing latency instead of the queue length like RED. For 
> +more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_aqm_mode {
> +	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection
> (WRED) */
> +	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time 
> @@ -174,9 +197,17 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_AQM
> +	/** Active Queue Management mode */
> +	enum rte_sched_aqm_mode aqm;
> +
> +	RTE_STD_C11
> +	union {
> +		/** WRED parameters */
> +		struct rte_red_params
> wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
>  };
> 
> @@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
>  	/** Number of bytes dropped for each traffic class */
>  	uint64_t
> n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	/** Number of packets dropped by red */
> -	uint64_t
> n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +#ifdef RTE_SCHED_AQM
> +	/** Number of packets dropped by active queue management
> scheme */
> +	uint64_t
> n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  #endif
>  };
> 
> @@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
>  	/** Packets dropped */
>  	uint64_t n_pkts_dropped;
> 
> -#ifdef RTE_SCHED_RED
> -	/** Packets dropped by RED */
> -	uint64_t n_pkts_red_dropped;
> +#ifdef RTE_SCHED_AQM
> +	/** Packets dropped by active queue management scheme */
> +	uint64_t n_pkts_aqm_dropped;
>  #endif
> 
>  	/** Bytes successfully written */
> diff --git a/lib/sched/version.map b/lib/sched/version.map index 
> ace284b7de..3422821ac8 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;
>  };
> --
> 2.25.1

NACK

I see that none of my previous comments from the V4 review got implemented, is there any reason to silently discard all of them?

https://patches.dpdk.org/project/dpdk/patch/20210705080421.18736-2-wojciechx.liguzinski@intel.com/

I did not see any reply from you on my comments, so I assumed that you accepted and implemented most of them, but I see that none of them were picked up.

Also, I don't see any revision history, just the version counter gets incremented, so reviewing a new version of your patch requires re-reading every line of code, which is time consuming. Could you please add a revision history?

Thanks,
Cristian

------------------------------------

First thing - I'm very sorry that I haven't replied to your comments. It was not my intention to ignore them.
I was going through them and I was a bit confused that most of your suggestions would actually revert majority of my changes suggested by other community members, e.g. by Stephen Hemminger.
I wanted to get some opinion how to proceed but I got some additional tasks on the way, so - my fault that I was postponing that. Again - apologies.

I will go through them again and implement/respond to them as quickly as possible.

I was uploading the patches following the procedure that were presented to me (rebasing the changes so the new ones are applied to existing commits), so honestly I was not aware that I need to do a revision history when applying anther version of patches.

Let me think of a way to provide you with such revision history.

Thanks,
Wojtek

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library
  2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                     ` (4 preceding siblings ...)
  2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-14 11:34                   ` Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                       ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 11:34 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |   82 +-
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  241 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    3 +
 20 files changed, 2154 insertions(+), 268 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v10 1/5] sched: add PIE based congestion management
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-14 11:34                     ` Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                       ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 11:34 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 241 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   3 +
 7 files changed, 702 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..d652663fe4 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+	
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1795,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1822,94 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	} else {
+
+		/* PIE */
+		struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+		struct rte_pie *pie = &qe->pie;
+		
+		return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
+	}
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE){
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1918,15 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2022,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2495,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2515,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..07fcf439d8 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,12 +197,30 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** WRED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 struct rte_sched_subport_profile_params {
 	/** Token bucket rate (measured in bytes per second) */
 	uint64_t tb_rate;
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v10 2/5] example/qos_sched: add PIE support
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-14 11:34                     ` Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                       ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 11:34 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  82 ++++++++++---
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 6 files changed, 216 insertions(+), 94 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..5e82866dce 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_mode = RTE_SCHED_CMAN_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,30 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+			subport_params[i].cman_params->cman_mode = cman_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].cman_params->cman_mode == RTE_SCHED_CMAN_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].cman_params->red_params[j][k].min_th =
+							red_params[j][k].min_th;
+						subport_params[i].cman_params->red_params[j][k].max_th =
+							red_params[j][k].max_th;
+						subport_params[i].cman_params->red_params[j][k].maxp_inv =
+							red_params[j][k].maxp_inv;
+						subport_params[i].cman_params->red_params[j][k].wq_log2 =
+							red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].cman_params->pie_params[j].qdelay_ref =
+						pie_params[j].qdelay_ref;
+					subport_params[i].cman_params->pie_params[j].dp_update_interval =
+						pie_params[j].dp_update_interval;
+					subport_params[i].cman_params->pie_params[j].max_burst =
+						pie_params[j].max_burst;
+					subport_params[i].cman_params->pie_params[j].tailq_th =
+						pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..94bad349e8 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_WRED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v10 3/5] example/ip_pipeline: add PIE support
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-14 11:34                     ` Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                       ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 11:34 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v10 4/5] doc/guides/prog_guide: added PIE
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                       ` (2 preceding siblings ...)
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-14 11:34                     ` Liguzinski, WojciechX
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 11:34 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v10 5/5] app/test: add tests for PIE
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                       ` (3 preceding siblings ...)
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-14 11:34                     ` Liguzinski, WojciechX
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 11:34 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |   17 +-
 5 files changed, 1103 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library
  2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                       ` (4 preceding siblings ...)
  2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-14 12:38                     ` Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                         ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 12:38 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |   83 +-
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  240 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    3 +
 20 files changed, 2154 insertions(+), 268 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v11 1/5] sched: add PIE based congestion management
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-14 12:38                       ` Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                         ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 12:38 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 240 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   3 +
 7 files changed, 701 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..a066eed186 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1795,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1822,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1917,15 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2021,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2494,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2514,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..07fcf439d8 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,12 +197,30 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** WRED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 struct rte_sched_subport_profile_params {
 	/** Token bucket rate (measured in bytes per second) */
 	uint64_t tb_rate;
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v11 2/5] example/qos_sched: add PIE support
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-14 12:38                       ` Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                         ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 12:38 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   |  83 +++++++++++---
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 6 files changed, 217 insertions(+), 94 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..8b05c7a80d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,20 +242,20 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman_mode;
 
-	snprintf(sec_name, sizeof(sec_name), "red");
+	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_mode = RTE_SCHED_CMAN_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
@@ -315,7 +315,42 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].qdelay_ref = (uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].max_burst = (uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].dp_update_interval = (uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				pie_params[i].tailq_th = (uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,17 +428,31 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+			subport_params[i].cman_params->cman_mode = cman_mode;
+
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
+				if (subport_params[i].cman_params->cman_mode ==
+					RTE_SCHED_CMAN_WRED) {
+					for (k = 0; k < RTE_COLORS; k++) {
+						subport_params[i].cman_params->red_params[j][k] \
+							.min_th = red_params[j][k].min_th;
+						subport_params[i].cman_params->red_params[j][k] \
+							.max_th = red_params[j][k].max_th;
+						subport_params[i].cman_params->red_params[j][k] \
+							.maxp_inv = red_params[j][k].maxp_inv;
+						subport_params[i].cman_params->red_params[j][k] \
+							.wq_log2 = red_params[j][k].wq_log2;
+					}
+				} else {
+					subport_params[i].cman_params->pie_params[j] \
+						.qdelay_ref = pie_params[j].qdelay_ref;
+					subport_params[i].cman_params->pie_params[j] \
+						.dp_update_interval = pie_params[j].dp_update_interval;
+					subport_params[i].cman_params->pie_params[j] \
+						.max_burst = pie_params[j].max_burst;
+					subport_params[i].cman_params->pie_params[j] \
+						.tailq_th = pie_params[j].tailq_th;
 				}
 			}
 #endif
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..94bad349e8 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_WRED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v11 3/5] example/ip_pipeline: add PIE support
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-14 12:38                       ` Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                         ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 12:38 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v11 4/5] doc/guides/prog_guide: added PIE
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                         ` (2 preceding siblings ...)
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-14 12:38                       ` Liguzinski, WojciechX
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 12:38 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v11 5/5] app/test: add tests for PIE
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                         ` (3 preceding siblings ...)
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-14 12:38                       ` Liguzinski, WojciechX
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 12:38 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |   17 +-
 5 files changed, 1103 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library
  2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                         ` (4 preceding siblings ...)
  2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-14 15:11                       ` Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                           ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |  111 +-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  240 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    3 +
 21 files changed, 2179 insertions(+), 276 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v12 1/5] sched: add PIE based congestion management
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-14 15:11                         ` Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                           ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 240 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   3 +
 7 files changed, 701 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..a066eed186 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1795,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1822,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1917,15 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2021,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2494,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2514,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..07fcf439d8 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,12 +197,30 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** WRED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 struct rte_sched_subport_profile_params {
 	/** Token bucket rate (measured in bytes per second) */
 	uint64_t tb_rate;
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v12 2/5] example/qos_sched: add PIE support
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-14 15:11                         ` Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                           ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   | 111 ++++++++++++++----
 examples/qos_sched/cfg_file.h   |   5 +
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 7 files changed, 242 insertions(+), 102 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..8028479726 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+	
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_WRED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_WRED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -275,7 +310,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -290,7 +325,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -306,7 +341,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..94bad349e8 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_WRED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v12 3/5] example/ip_pipeline: add PIE support
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-14 15:11                         ` Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                           ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v12 4/5] doc/guides/prog_guide: added PIE
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                           ` (2 preceding siblings ...)
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-14 15:11                         ` Liguzinski, WojciechX
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v12 5/5] app/test: add tests for PIE
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                           ` (3 preceding siblings ...)
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-14 15:11                         ` Liguzinski, WojciechX
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:11 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |   17 +-
 5 files changed, 1103 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management
  2021-09-07 19:14             ` Stephen Hemminger
  2021-09-08  8:49               ` Liguzinski, WojciechX
@ 2021-10-14 15:13               ` Liguzinski, WojciechX
  1 sibling, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:13 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Singh, Jasvinder, Dumitrescu, Cristian, Ajmera, Megha

Hi Stephen,

I have applied as much your comments as it could be changed.
Unfortunately, not all calculations could be change to improve performance.

Thanks,
Wojtek

-----Original Message-----
From: Stephen Hemminger <stephen@networkplumber.org> 
Sent: Tuesday, September 7, 2021 9:15 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Cc: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Ajmera, Megha <megha.ajmera@intel.com>
Subject: Re: [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management

On Tue,  7 Sep 2021 07:33:24 +0000
"Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:

> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)  */ static 
> +inline void __rte_experimental _calc_drop_probability(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)

This code adds a lot of inline functions in the name of performance.
But every inline like this means the internal ABI for the implmentation has to be exposed.

You would probably get a bigger performance bump from not using floating point in the internal math, than the minor performance optimization from having so many inlines.

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library
  2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                           ` (4 preceding siblings ...)
  2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-14 15:33                         ` Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                             ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/autotest_data.py                    |   18 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |  111 +-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  240 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    3 +
 21 files changed, 2179 insertions(+), 276 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v13 1/5] sched: add PIE based congestion management
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-14 15:33                           ` Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                             ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 240 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   3 +
 7 files changed, 701 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..a066eed186 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1795,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1822,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1917,15 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2021,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2494,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2514,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..07fcf439d8 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,12 +197,30 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** WRED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 struct rte_sched_subport_profile_params {
 	/** Token bucket rate (measured in bytes per second) */
 	uint64_t tb_rate;
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index ace284b7de..3422821ac8 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v13 2/5] example/qos_sched: add PIE support
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-14 15:33                           ` Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                             ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   | 111 ++++++++++++++----
 examples/qos_sched/cfg_file.h   |   5 +
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 7 files changed, 242 insertions(+), 102 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..ea8b078566 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_WRED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_WRED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -275,7 +310,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -290,7 +325,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -306,7 +341,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..94bad349e8 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_WRED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v13 3/5] example/ip_pipeline: add PIE support
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-14 15:33                           ` Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                             ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v13 4/5] doc/guides/prog_guide: added PIE
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                             ` (2 preceding siblings ...)
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-14 15:33                           ` Liguzinski, WojciechX
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v13 5/5] app/test: add tests for PIE
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                             ` (3 preceding siblings ...)
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-14 15:33                           ` Liguzinski, WojciechX
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 15:33 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/autotest_data.py |   18 +
 app/test/meson.build      |    4 +
 app/test/test_pie.c       | 1065 +++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c       |    6 +-
 lib/sched/rte_pie.h       |   17 +-
 5 files changed, 1103 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 302d6374c1..1d4418b6a3 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -279,6 +279,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie autotest",
+        "Command": "pie_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "PMD ring autotest",
         "Command": "ring_pmd_autotest",
@@ -525,6 +531,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie all",
+        "Command": "red_all",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
 	"Name":    "Fbarray autotest",
 	"Command": "fbarray_autotest",
@@ -731,6 +743,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Pie_perf",
+        "Command": "pie_perf",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Lpm6 perf autotest",
         "Command": "lpm6_perf_autotest",
diff --git a/app/test/meson.build b/app/test/meson.build
index a7611686ad..f224b0c17e 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -111,6 +111,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -241,6 +242,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -292,6 +294,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -305,6 +308,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management
  2021-10-12 18:34                       ` Liguzinski, WojciechX
@ 2021-10-14 16:02                         ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-14 16:02 UTC (permalink / raw)
  To: Dumitrescu, Cristian, dev, Singh, Jasvinder; +Cc: Ajmera, Megha, Zegota, AnnaX

Hi Cristian,

-----Original Message-----
From: Liguzinski, WojciechX 
Sent: Tuesday, October 12, 2021 8:34 PM
To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>; Cybura, LukaszX <LukaszX.Cybura@intel.com>; Zegota, AnnaX <annax.zegota@intel.com>
Subject: RE: [PATCH v9 1/5] sched: add PIE based congestion management

Hi Cristian,

-----Original Message-----
From: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
Sent: Tuesday, October 12, 2021 6:00 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>
Subject: RE: [PATCH v9 1/5] sched: add PIE based congestion management

Hi Wojchech,

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 11, 2021 8:56 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; 
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v9 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 228 +++++++++----
>  lib/sched/rte_sched.h                    |  53 ++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 685 insertions(+), 90 deletions(-)  create mode
> 100644 lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h
> 
> diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c
> b/drivers/net/softnic/rte_eth_softnic_tm.c
> index 90baba15ce..5b6c4e6d4b 100644
> --- a/drivers/net/softnic/rte_eth_softnic_tm.c
> +++ b/drivers/net/softnic/rte_eth_softnic_tm.c
> @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
>  	return 0;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
>  #define WRED_SUPPORTED						1
>  #else
>  #define WRED_SUPPORTED						0
> @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, 
> uint32_t tc_id)
>  	return NULL;
>  }
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static void
>  wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id) @@
> -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t
> subport_id)
>  	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE;
> tc_id++)
>  		for (color = RTE_COLOR_GREEN; color < RTE_COLORS;
> color++) {
>  			struct rte_red_params *dst =
> -				&pp->red_params[tc_id][color];
> +				&pp->wred_params[tc_id][color];
>  			struct tm_wred_profile *src_wp =
>  				tm_tc_wred_profile_get(dev, tc_id);
>  			struct rte_tm_red_params *src =
> diff --git a/lib/sched/meson.build b/lib/sched/meson.build index
> b24f7b8775..e7ae9bcf19 100644
> --- a/lib/sched/meson.build
> +++ b/lib/sched/meson.build
> @@ -1,11 +1,7 @@
>  # SPDX-License-Identifier: BSD-3-Clause  # Copyright(c) 2017 Intel 
> Corporation
> 
> -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c') -headers 
> = files(
> -        'rte_approx.h',
> -        'rte_red.h',
> -        'rte_sched.h',
> -        'rte_sched_common.h',
> -)
> +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c',
> +'rte_pie.c') headers = files('rte_sched.h', 'rte_sched_common.h',
> +		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
>  deps += ['mbuf', 'meter']
> diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c new file mode
> 100644 index 0000000000..2fcecb2db4
> --- /dev/null
> +++ b/lib/sched/rte_pie.c
> @@ -0,0 +1,82 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#include <stdlib.h>
> +
> +#include "rte_pie.h"
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_malloc.h>
> +
> +#ifdef __INTEL_COMPILER
> +#pragma warning(disable:2259) /* conversion may lose significant bits 
> +*/ #endif
> +
> +void
> +rte_pie_rt_data_init(struct rte_pie *pie) {
> +	if (pie == NULL) {
> +		/* Allocate memory to use the PIE data structure */
> +		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
> +
> +		if (pie == NULL)
> +			RTE_LOG(ERR, SCHED, "%s: Memory allocation
> fails\n", __func__);
> +	}
> +
> +	pie->active = 0;
> +	pie->in_measurement = 0;
> +	pie->departed_bytes_count = 0;
> +	pie->start_measurement = 0;
> +	pie->last_measurement = 0;
> +	pie->qlen = 0;
> +	pie->avg_dq_time = 0;
> +	pie->burst_allowance = 0;
> +	pie->qdelay_old = 0;
> +	pie->drop_prob = 0;
> +	pie->accu_prob = 0;
> +}
> +
> +int
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th)
> +{
> +	uint64_t tsc_hz = rte_get_tsc_hz();
> +
> +	if (pie_cfg == NULL)
> +		return -1;
> +
> +	if (qdelay_ref <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for qdelay_ref\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (dp_update_interval <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for dp_update_interval\n",
> __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (max_burst <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for max_burst\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	if (tailq_th <= 0) {
> +		RTE_LOG(ERR, SCHED,
> +			"%s: Incorrect value for tailq_th\n", __func__);
> +		return -EINVAL;
> +	}
> +
> +	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
> +	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) /
> 1000;
> +	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
> +	pie_cfg->tailq_th = tailq_th;
> +
> +	return 0;
> +}
> diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h new file mode
> 100644 index 0000000000..f83c95664f
> --- /dev/null
> +++ b/lib/sched/rte_pie.h
> @@ -0,0 +1,393 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation  */
> +
> +#ifndef __RTE_PIE_H_INCLUDED__
> +#define __RTE_PIE_H_INCLUDED__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * @file
> + * RTE Proportional Integral controller Enhanced (PIE)
> + *
> + *
> + ***/
> +
> +#include <stdint.h>
> +
> +#include <rte_random.h>
> +#include <rte_debug.h>
> +
> +#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
> +				     * to start measurement cycle (bytes)
> +				     */
> +#define RTE_DQ_WEIGHT      0.25    /**< Weight
> (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
> +#define RTE_ALPHA          0.125   /**< Weights in drop probability
> calculations */
> +#define RTE_BETA           1.25    /**< Weights in drop probability calculations
> */
> +#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number
> */
> +
> +
> +/**
> + * PIE configuration parameters passed by user
> + *
> + */
> +struct rte_pie_params {
> +	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
> +	uint16_t dp_update_interval;   /**< Update interval for drop
> probability (milliseconds) */
> +	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * PIE configuration parameters
> + *
> + */
> +struct rte_pie_config {
> +	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
> +	uint64_t dp_update_interval;   /**< Update interval for drop
> probability (in CPU cycles) */
> +	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.)
> */
> +	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
> +};
> +
> +/**
> + * RED run-time data
> + */
> +struct rte_pie {
> +	uint16_t active;               /**< Flag for activating/deactivating pie */
> +	uint16_t in_measurement;       /**< Flag for activation of
> measurement cycle */
> +	uint32_t departed_bytes_count; /**< Number of bytes departed in
> current measurement cycle */
> +	uint64_t start_measurement;    /**< Time to start to measurement
> cycle (in cpu cycles) */
> +	uint64_t last_measurement;     /**< Time of last measurement (in
> cpu cycles) */
> +	uint64_t qlen;                 /**< Queue length (packets count) */
> +	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
> +	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in
> cpu cycles) */
> +	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
> +	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
> +	double drop_prob;              /**< Current packet drop probability */
> +	double accu_prob;              /**< Accumulated packet drop probability
> */
> +};
> +
> +/**
> + * @brief Initialises run-time data
> + *
> + * @param pie [in,out] data pointer to PIE runtime data  */ void 
> +__rte_experimental rte_pie_rt_data_init(struct rte_pie *pie);
> +
> +/**
> + * @brief Configures a single PIE configuration parameter structure.
> + *
> + * @param pie_cfg [in,out] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param qdelay_ref [in]  latency target(milliseconds)
> + * @param dp_update_interval [in] update interval for drop 
> + probability
> (milliseconds)
> + * @param max_burst [in] maximum burst allowance (milliseconds)
> + * @param tailq_th [in] tail drop threshold for the queue (number of
> packets)
> + *
> + * @return Operation status
> + * @retval 0 success
> + * @retval !0 error
> + */
> +int
> +__rte_experimental
> +rte_pie_config_init(struct rte_pie_config *pie_cfg,
> +	const uint16_t qdelay_ref,
> +	const uint16_t dp_update_interval,
> +	const uint16_t max_burst,
> +	const uint16_t tailq_th);
> +
> +/**
> + * @brief Decides packet enqueue when queue is empty
> + *
> + * Note: packet is never dropped in this particular case.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval !0 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len)
> +{
> +	RTE_ASSERT(pkt_len != NULL);
> +
> +	/* Update the PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/**
> +	 * If the queue has been idle for a while, turn off PIE and Reset
> counters
> +	 */
> +	if ((pie->active == 1) &&
> +		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + * @param time [in] current time (measured in cpu cycles)  */ static 
> +inline void __rte_experimental _calc_drop_probability(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie, uint64_t time)
> +{
> +	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
> +
> +	/* Note: can be implemented using integer multiply.
> +	 * DQ_THRESHOLD is power of 2 value.
> +	 */
> +	double current_qdelay = pie->qlen * (pie->avg_dq_time /
> RTE_DQ_THRESHOLD);
> +
> +	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
> +		RTE_BETA * (current_qdelay - pie->qdelay_old);
> +
> +	if (pie->drop_prob < 0.000001)
> +		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
> +	else if (pie->drop_prob < 0.00001)
> +		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
> +	else if (pie->drop_prob < 0.0001)
> +		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
> +	else if (pie->drop_prob < 0.001)
> +		p = p * 0.03125;                    /* (1/32) = 0.03125   */
> +	else if (pie->drop_prob < 0.01)
> +		p = p * 0.125;                      /* (1/8) = 0.125    */
> +	else if (pie->drop_prob < 0.1)
> +		p = p * 0.5;                        /* (1/2) = 0.5    */
> +
> +	if (pie->drop_prob >= 0.1 && p > 0.02)
> +		p = 0.02;
> +
> +	pie->drop_prob += p;
> +
> +	double qdelay = qdelay_ref * 0.5;
> +
> +	/*  Exponentially decay drop prob when congestion goes away  */
> +	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
> +		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
> +
> +	/* Bound drop probability */
> +	if (pie->drop_prob < 0)
> +		pie->drop_prob = 0;
> +	if (pie->drop_prob > 1)
> +		pie->drop_prob = 1;
> +
> +	pie->qdelay_old = current_qdelay;
> +	pie->last_measurement = time;
> +
> +	uint64_t burst_allowance = pie->burst_allowance - pie_cfg-
> >dp_update_interval;
> +
> +	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 
> +}
> +
> +/**
> + * @brief make a decision to drop or enqueue a packet based on probability
> + *        criteria
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> +parameter
> structure
> + * @param pie [in, out] data pointer to PIE runtime data
> + *
> + * @return operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet
> + */
> +static inline int
> +__rte_experimental
> +_rte_pie_drop(const struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie)
> +{
> +	uint64_t rand_value;
> +	double qdelay = pie_cfg->qdelay_ref * 0.5;
> +
> +	/* PIE is active but the queue is not congested: return 0 */
> +	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
> +		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
> +		return 0;
> +
> +	if (pie->drop_prob == 0)
> +		pie->accu_prob = 0;
> +
> +	/* For practical reasons, drop probability can be further scaled
> according
> +	 * to packet size, but one needs to set a bound to avoid unnecessary
> bias
> +	 * Random drop
> +	 */
> +	pie->accu_prob += pie->drop_prob;
> +
> +	if (pie->accu_prob < 0.85)
> +		return 0;
> +
> +	if (pie->accu_prob >= 8.5)
> +		return 1;
> +
> +	rand_value = rte_rand()/RTE_RAND_MAX;
> +
> +	if ((double)rand_value < pie->drop_prob) {
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped for non-
> empty queue
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on max threshold criterion
> + * @retval 2 drop the packet based on mark probability criterion  */ 
> +static inline int __rte_experimental rte_pie_enqueue_nonempty(const 
> +struct rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	/* Check queue space against the tail drop threshold */
> +	if (pie->qlen >= pie_cfg->tailq_th) {
> +
> +		pie->accu_prob = 0;
> +		return 1;
> +	}
> +
> +	if (pie->active) {
> +		/* Update drop probability after certain interval */
> +		if ((time - pie->last_measurement) >= pie_cfg-
> >dp_update_interval)
> +			_calc_drop_probability(pie_cfg, pie, time);
> +
> +		/* Decide whether packet to be dropped or enqueued */
> +		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance ==
> 0)
> +			return 2;
> +	}
> +
> +	/* When queue occupancy is over a certain threshold, turn on PIE */
> +	if ((pie->active == 0) &&
> +		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
> +		pie->active = 1;
> +		pie->qdelay_old = 0;
> +		pie->drop_prob = 0;
> +		pie->in_measurement = 1;
> +		pie->departed_bytes_count = 0;
> +		pie->avg_dq_time = 0;
> +		pie->last_measurement = time;
> +		pie->burst_allowance = pie_cfg->max_burst;
> +		pie->accu_prob = 0;
> +		pie->start_measurement = time;
> +	}
> +
> +	/* when queue has been idle for a while, turn off PIE and Reset
> counters */
> +	if (pie->active == 1 &&
> +		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
> +		pie->active =  0;
> +		pie->in_measurement = 0;
> +	}
> +
> +	/* Update PIE qlen parameter */
> +	pie->qlen++;
> +	pie->qlen_bytes += pkt_len;
> +
> +	/* No drop */
> +	return 0;
> +}
> +
> +/**
> + * @brief Decides if new packet should be enqeued or dropped
> + * Updates run time data and gives verdict whether to enqueue or drop 
> +the
> packet.
> + *
> + * @param pie_cfg [in] config pointer to a PIE configuration 
> + parameter
> structure
> + * @param pie [in,out] data pointer to PIE runtime data
> + * @param qlen [in] queue length
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp (measured in cpu cycles)
> + *
> + * @return Operation status
> + * @retval 0 enqueue the packet
> + * @retval 1 drop the packet based on drop probility criteria  */ 
> +static inline int __rte_experimental rte_pie_enqueue(const struct 
> +rte_pie_config *pie_cfg,
> +	struct rte_pie *pie,
> +	const unsigned int qlen,
> +	uint32_t pkt_len,
> +	const uint64_t time)
> +{
> +	RTE_ASSERT(pie_cfg != NULL);
> +	RTE_ASSERT(pie != NULL);
> +
> +	if (qlen != 0)
> +		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len,
> time);
> +	else
> +		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); }
> +
> +/**
> + * @brief PIE rate estimation method
> + * Called on each packet departure.
> + *
> + * @param pie [in] data pointer to PIE runtime data
> + * @param pkt_len [in] packet length in bytes
> + * @param time [in] current time stamp in cpu cycles  */ static 
> +inline void __rte_experimental rte_pie_dequeue(struct rte_pie *pie,
> +	uint32_t pkt_len,
> +	uint64_t time)
> +{
> +	/* Dequeue rate estimation */
> +	if (pie->in_measurement) {
> +		pie->departed_bytes_count += pkt_len;
> +
> +		/* Start a new measurement cycle when enough packets */
> +		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
> +			uint64_t dq_time = time - pie->start_measurement;
> +
> +			if (pie->avg_dq_time == 0)
> +				pie->avg_dq_time = dq_time;
> +			else
> +				pie->avg_dq_time = dq_time *
> RTE_DQ_WEIGHT + pie->avg_dq_time
> +					* (1 - RTE_DQ_WEIGHT);
> +
> +			pie->in_measurement = 0;
> +		}
> +	}
> +
> +	/* Start measurement cycle when enough data in the queue */
> +	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie-
> >in_measurement == 0)) {
> +		pie->in_measurement = 1;
> +		pie->start_measurement = time;
> +		pie->departed_bytes_count = 0;
> +	}
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PIE_H_INCLUDED__ */
> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c index
> a858f61f95..320435ed91 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c
> @@ -89,8 +89,12 @@ struct rte_sched_queue {
> 
>  struct rte_sched_queue_extra {
>  	struct rte_sched_queue_stats stats;
> -#ifdef RTE_SCHED_RED
> -	struct rte_red red;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red red;
> +		struct rte_pie pie;
> +	};
>  #endif
>  };
> 
> @@ -183,8 +187,13 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +	enum rte_sched_aqm_mode aqm;
> +#ifdef RTE_SCHED_AQM
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
> 
>  	/* Scheduling loop detection */
> @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port 
> *port, uint32_t n_subports)
>  	rte_free(port);
>  }
> 
> +#ifdef RTE_SCHED_AQM
> +
> +static int
> +rte_sched_red_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +
> +		uint32_t j;
> +
> +		for (j = 0; j < RTE_COLORS; j++) {
> +			/* if min/max are both zero, then RED is disabled */
> +			if ((params->wred_params[i][j].min_th |
> +				 params->wred_params[i][j].max_th) == 0) {
> +				continue;
> +			}
> +
> +			if (rte_red_config_init(&s->wred_config[i][j],
> +				params->wred_params[i][j].wq_log2,
> +				params->wred_params[i][j].min_th,
> +				params->wred_params[i][j].max_th,
> +				params->wred_params[i][j].maxp_inv) != 0) {
> +				rte_sched_free_memory(port, n_subports);
> +
> +				RTE_LOG(NOTICE, SCHED,
> +				"%s: RED configuration init fails\n",
> __func__);
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	s->aqm = RTE_SCHED_AQM_WRED;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_pie_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> +		if (params->pie_params[i].tailq_th > params->qsize[i]) {
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE tailq threshold incorrect\n", __func__);
> +			return -EINVAL;
> +		}
> +
> +		if (rte_pie_config_init(&s->pie_config[i],
> +			params->pie_params[i].qdelay_ref,
> +			params->pie_params[i].dp_update_interval,
> +			params->pie_params[i].max_burst,
> +			params->pie_params[i].tailq_th) != 0) {
> +			rte_sched_free_memory(port, n_subports);
> +
> +			RTE_LOG(NOTICE, SCHED,
> +			"%s: PIE configuration init fails\n", __func__);
> +			return -EINVAL;
> +			}
> +	}
> +	s->aqm = RTE_SCHED_AQM_PIE;
> +	return 0;
> +}
> +
> +static int
> +rte_sched_aqm_config(struct rte_sched_port *port,
> +	struct rte_sched_subport *s,
> +	struct rte_sched_subport_params *params,
> +	uint32_t n_subports)
> +{
> +	if (params->aqm == RTE_SCHED_AQM_WRED)
> +		return rte_sched_red_config(port, s, params, n_subports);
> +
> +	else if (params->aqm == RTE_SCHED_AQM_PIE)
> +		return rte_sched_pie_config(port, s, params, n_subports);
> +
> +	return -EINVAL;
> +}
> +#endif
> +
>  int
>  rte_sched_subport_config(struct rte_sched_port *port,
>  	uint32_t subport_id,
> @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port 
> *port,
>  		s->n_pipe_profiles = params->n_pipe_profiles;
>  		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
> -			uint32_t j;
> -
> -			for (j = 0; j < RTE_COLORS; j++) {
> -			/* if min/max are both zero, then RED is disabled */
> -				if ((params->red_params[i][j].min_th |
> -				     params->red_params[i][j].max_th) == 0) {
> -					continue;
> -				}
> -
> -				if (rte_red_config_init(&s->red_config[i][j],
> -				    params->red_params[i][j].wq_log2,
> -				    params->red_params[i][j].min_th,
> -				    params->red_params[i][j].max_th,
> -				    params->red_params[i][j].maxp_inv) != 0)
> {
> -					RTE_LOG(NOTICE, SCHED,
> -					"%s: RED configuration init fails\n",
> -					__func__);
> -					ret = -EINVAL;
> -					goto out;
> -				}
> -			}
> +#ifdef RTE_SCHED_AQM
> +		status = rte_sched_aqm_config(port, s, params,
> n_subports);
> +		if (status) {
> +			RTE_LOG(NOTICE, SCHED, "%s: AQM configuration
> fails\n", __func__);
> +			return status;
>  		}
>  #endif
> 
> @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct
> rte_sched_port *port,
>  	subport->stats.n_bytes_tc[tc_index] += pkt_len;  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
> -	struct rte_sched_subport *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port 
> *port,
>  	struct rte_sched_subport *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
>  	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len; -#ifdef 
> RTE_SCHED_RED
> -	subport->stats.n_pkts_red_dropped[tc_index] += red;
> +#ifdef RTE_SCHED_AQM
> +	subport->stats.n_pkts_aqm_dropped[tc_index] += drops;
>  #endif
>  }
> 
> @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct
> rte_sched_subport *subport,
>  	qe->stats.n_bytes += pkt_len;
>  }
> 
> -#ifdef RTE_SCHED_RED
> -static inline void
> -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
> -	uint32_t qindex,
> -	struct rte_mbuf *pkt,
> -	uint32_t red)
> -#else
>  static inline void
>  rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport 
> *subport,
>  	uint32_t qindex,
>  	struct rte_mbuf *pkt,
> -	__rte_unused uint32_t red)
> -#endif
> +	__rte_unused uint32_t drops)
>  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
>  	uint32_t pkt_len = pkt->pkt_len;
> 
>  	qe->stats.n_pkts_dropped += 1;
>  	qe->stats.n_bytes_dropped += pkt_len; -#ifdef RTE_SCHED_RED
> -	qe->stats.n_pkts_red_dropped += red;
> +#ifdef RTE_SCHED_AQM
> +	qe->stats.n_pkts_aqm_dropped += drops;
>  #endif
>  }
> 
>  #endif /* RTE_SCHED_COLLECT_STATS */
> 
> -#ifdef RTE_SCHED_RED
> +#ifdef RTE_SCHED_AQM
> 
>  static inline int
> -rte_sched_port_red_drop(struct rte_sched_port *port,
> +rte_sched_port_aqm_drop(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport,
>  	struct rte_mbuf *pkt,
>  	uint32_t qindex,
>  	uint16_t qlen)
>  {
>  	struct rte_sched_queue_extra *qe;
> -	struct rte_red_config *red_cfg;
> -	struct rte_red *red;
>  	uint32_t tc_index;
> -	enum rte_color color;
> 
>  	tc_index = rte_sched_port_pipe_tc(port, qindex);
> -	color = rte_sched_port_pkt_read_color(pkt);
> -	red_cfg = &subport->red_config[tc_index][color];
> +	qe = subport->queue_extra + qindex;
> 
> -	if ((red_cfg->min_th | red_cfg->max_th) == 0)
> -		return 0;
> +	/* WRED */
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red_config *red_cfg;
> +		struct rte_red *red;
> +		enum rte_color color;
> 
> -	qe = subport->queue_extra + qindex;
> -	red = &qe->red;
> +		color = rte_sched_port_pkt_read_color(pkt);
> +		red_cfg = &subport->wred_config[tc_index][color];
> +
> +		if ((red_cfg->min_th | red_cfg->max_th) == 0)
> +			return 0;
> 
> -	return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +		red = &qe->red;
> +
> +		return rte_red_enqueue(red_cfg, red, qlen, port->time);
> +	}
> +
> +	/* PIE */
> +	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
> +	struct rte_pie *pie = &qe->pie;
> +
> +	return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port-
> >time_cpu_cycles);
>  }
> 
>  static inline void
> @@ -1815,14 +1885,29 @@
> rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
>  	struct rte_sched_subport *subport, uint32_t qindex)  {
>  	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
> -	struct rte_red *red = &qe->red;
> +	if (subport->aqm == RTE_SCHED_AQM_WRED) {
> +		struct rte_red *red = &qe->red;
> +
> +		rte_red_mark_queue_empty(red, port->time);
> +	}
> +}
> +
> +static inline void
> +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, 
> +uint32_t qindex, uint32_t pkt_len, uint64_t time) {
> +	struct rte_sched_queue_extra *qe = subport->queue_extra +
> qindex;
> +	struct rte_pie *pie = &qe->pie;
> 
> -	rte_red_mark_queue_empty(red, port->time);
> +	/* Update queue length */
> +	pie->qlen -= 1;
> +	pie->qlen_bytes -= pkt_len;
> +
> +	rte_pie_dequeue(pie, pkt_len, time);
>  }
> 
>  #else
> 
> -static inline int rte_sched_port_red_drop(struct rte_sched_port *port 
> __rte_unused,
> +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port
> __rte_unused,
>  	struct rte_sched_subport *subport __rte_unused,
>  	struct rte_mbuf *pkt __rte_unused,
>  	uint32_t qindex __rte_unused,
> @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct 
> rte_sched_port *port __rte_unus
> 
>  #define rte_sched_port_set_queue_empty_timestamp(port, subport,
> qindex)
> 
> -#endif /* RTE_SCHED_RED */
> +#endif /* RTE_SCHED_AQM */
> 
>  #ifdef RTE_SCHED_DEBUG
> 
> @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port 
> *port,
>  	qlen = q->qw - q->qr;
> 
>  	/* Drop the packet (and update drop stats) when queue is full */
> -	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex,
> qlen) ||
> +	if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex,
> qlen) ||
>  		     (qlen >= qsize))) {
>  		rte_pktmbuf_free(pkt);
>  #ifdef RTE_SCHED_COLLECT_STATS
> @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port,  {
>  	struct rte_sched_grinder *grinder = subport->grinder + pos;
>  	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
> +	uint32_t qindex = grinder->qindex[grinder->qpos];
>  	struct rte_mbuf *pkt = grinder->pkt;
>  	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
>  	uint32_t be_tc_active;
> @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port,
>  		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
> 
>  	if (queue->qr == queue->qw) {
> -		uint32_t qindex = grinder->qindex[grinder->qpos];
> -
>  		rte_bitmap_clear(subport->bmp, qindex);
>  		grinder->qmask &= ~(1 << grinder->qpos);
>  		if (be_tc_active)
>  			grinder->wrr_mask[grinder->qpos] = 0;
> +
>  		rte_sched_port_set_queue_empty_timestamp(port,
> subport, qindex);
>  	}
> 
> +#ifdef RTE_SCHED_AQM
> +	if (subport->aqm == RTE_SCHED_AQM_PIE)
> +		rte_sched_port_pie_dequeue(subport, qindex, pkt_len,
> port->time_cpu_cycles);
> +#endif
> +
>  	/* Reset pipe loop detection */
>  	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
>  	grinder->productive = 1;
> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h index 
> c1a772b70c..a5fe6266cd 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Active Queue Management */
> +#ifdef RTE_SCHED_AQM
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Active Queue Management (AQM) mode
> + *
> + * This is used for controlling the admission of packets into a 
> +packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The 
> + *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each 
> + packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) 
> + randomly
> + * drops a packet at the onset of the congestion and tries to control 
> + the
> + * latency around the target value. The congestion detection, 
> + however, is
> based
> + * on the queueing latency instead of the queue length like RED. For 
> +more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_aqm_mode {
> +	RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection
> (WRED) */
> +	RTE_SCHED_AQM_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time 
> @@ -174,9 +197,17 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_AQM
> +	/** Active Queue Management mode */
> +	enum rte_sched_aqm_mode aqm;
> +
> +	RTE_STD_C11
> +	union {
> +		/** WRED parameters */
> +		struct rte_red_params
> wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif
>  };
> 
> @@ -208,9 +239,9 @@ struct rte_sched_subport_stats {
>  	/** Number of bytes dropped for each traffic class */
>  	uint64_t
> n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	/** Number of packets dropped by red */
> -	uint64_t
> n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +#ifdef RTE_SCHED_AQM
> +	/** Number of packets dropped by active queue management
> scheme */
> +	uint64_t
> n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
>  #endif
>  };
> 
> @@ -222,9 +253,9 @@ struct rte_sched_queue_stats {
>  	/** Packets dropped */
>  	uint64_t n_pkts_dropped;
> 
> -#ifdef RTE_SCHED_RED
> -	/** Packets dropped by RED */
> -	uint64_t n_pkts_red_dropped;
> +#ifdef RTE_SCHED_AQM
> +	/** Packets dropped by active queue management scheme */
> +	uint64_t n_pkts_aqm_dropped;
>  #endif
> 
>  	/** Bytes successfully written */
> diff --git a/lib/sched/version.map b/lib/sched/version.map index
> ace284b7de..3422821ac8 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;
>  };
> --
> 2.25.1

NACK

I see that none of my previous comments from the V4 review got implemented, is there any reason to silently discard all of them?

https://patches.dpdk.org/project/dpdk/patch/20210705080421.18736-2-wojciechx.liguzinski@intel.com/

I did not see any reply from you on my comments, so I assumed that you accepted and implemented most of them, but I see that none of them were picked up.

Also, I don't see any revision history, just the version counter gets incremented, so reviewing a new version of your patch requires re-reading every line of code, which is time consuming. Could you please add a revision history?

Thanks,
Cristian

------------------------------------

First thing - I'm very sorry that I haven't replied to your comments. It was not my intention to ignore them.
I was going through them and I was a bit confused that most of your suggestions would actually revert majority of my changes suggested by other community members, e.g. by Stephen Hemminger.
I wanted to get some opinion how to proceed but I got some additional tasks on the way, so - my fault that I was postponing that. Again - apologies.

I will go through them again and implement/respond to them as quickly as possible.

I was uploading the patches following the procedure that were presented to me (rebasing the changes so the new ones are applied to existing commits), so honestly I was not aware that I need to do a revision history when applying anther version of patches.

Let me think of a way to provide you with such revision history.

Thanks,
Wojtek

----------------------


Thanks for your review comments.

Replying to them:
- The majority of them I have implemented/added to the code,
- Adding this one else statement for the "declaring new variables in the middle of the function" had to be rejected as it was in conflict with chechpatch and resulted in a warning,
- Another thing that I needed to omit is using this generic struct rte_sched_cman_params in struct rte_sched_subport (rte_sched.c) as config structures for red an pie are used there, and I thought that with limited time I don't want to risk with the implementation,
- Introducing rte_sched_cman_params structure changed a bit the code, but I hope that I didn't break anything
- Some changes (like in cfg_file.c & cfg_file.h) where the result of previous point + exceeding the line length

So now, I'd like to ask you for a review.
Unfortunately, I haven't found a neat way to show you the revision history, but please have a look here: https://patchwork.dpdk.org/project/dpdk/list/?submitter=2195&state=%2A&archive=both


Thanks,
Wojtek

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library
  2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                             ` (4 preceding siblings ...)
  2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-15  8:16                           ` Liguzinski, WojciechX
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                               ` (6 more replies)
  5 siblings, 7 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-15  8:16 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency 
variation. Currently, it supports RED for active queue management (which is designed 
to control the queue length but it does not control latency directly and is now being 
obsoleted). However, more advanced queue management is required to address this problem
and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address 
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and 
adding a new set of data structures to the library, adding PIE related APIs. 
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |  111 +-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  240 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    3 +
 20 files changed, 2161 insertions(+), 276 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-15  8:16                             ` Liguzinski, WojciechX
  2021-10-15 13:51                               ` Dumitrescu, Cristian
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                               ` (5 subsequent siblings)
  6 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-15  8:16 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 240 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   3 +
 7 files changed, 701 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..a066eed186 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_WRED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_WRED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1795,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1822,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* WRED */
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_WRED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1917,15 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2021,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2494,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2514,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..07fcf439d8 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,12 +197,30 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** WRED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 struct rte_sched_subport_profile_params {
 	/** Token bucket rate (measured in bytes per second) */
 	uint64_t tb_rate;
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index 53c337b143..54e5e96d4f 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,7 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v14 2/5] example/qos_sched: add PIE support
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-15  8:16                             ` Liguzinski, WojciechX
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                               ` (4 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-15  8:16 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   | 111 ++++++++++++++----
 examples/qos_sched/cfg_file.h   |   5 +
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 7 files changed, 242 insertions(+), 102 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..ea8b078566 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_WRED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_WRED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_WRED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
 			/* Parse WRED min thresholds */
 			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -275,7 +310,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -290,7 +325,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -306,7 +341,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..94bad349e8 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_WRED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v14 3/5] example/ip_pipeline: add PIE support
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-15  8:16                             ` Liguzinski, WojciechX
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                               ` (3 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-15  8:16 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v14 4/5] doc/guides/prog_guide: added PIE
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                               ` (2 preceding siblings ...)
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-15  8:16                             ` Liguzinski, WojciechX
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 5/5] app/test: add tests for PIE Liguzinski, WojciechX
                                               ` (2 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-15  8:16 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v14 5/5] app/test: add tests for PIE
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                               ` (3 preceding siblings ...)
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-15  8:16                             ` Liguzinski, WojciechX
  2021-10-15 13:56                             ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Dumitrescu, Cristian
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-15  8:16 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/meson.build |    4 +
 app/test/test_pie.c  | 1065 ++++++++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c  |    6 +-
 lib/sched/rte_pie.h  |   17 +-
 4 files changed, 1085 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/meson.build b/app/test/meson.build
index f144d8b8ed..00ad7ab368 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -112,6 +112,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -242,6 +243,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -293,6 +295,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -306,6 +309,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-15 13:51                               ` Dumitrescu, Cristian
  2021-10-19  9:34                                 ` Liguzinski, WojciechX
  0 siblings, 1 reply; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-10-15 13:51 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Singh, Jasvinder; +Cc: Ajmera, Megha

Hi Wojciech,

Thank you for your patchset!

Can you please, at least starting from this version, add a short change log at the top of your file, just after the signoff line? It helps a lot during the review process, and you can find abundant examples in other patchsets from this email list. One line of description for every change would be nice, thank you.

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Friday, October 15, 2021 9:16 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v14 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 240 +++++++++-----
>  lib/sched/rte_sched.h                    |  63 +++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 701 insertions(+), 96 deletions(-)
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 

<snip>

> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index a858f61f95..a066eed186 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c

<snip>

> @@ -183,8 +187,14 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_CMAN
> +	enum rte_sched_cman_mode cman;
> +
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif

Can you please use here the rte_sched_cman_params structure that you just created in rte_sched.h as opposed to inlining this structure. Yes, I agree it might have some ripple effect throughout this file, but I think it should be very limited, and also quick to do.

<snip>

> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> index c1a772b70c..07fcf439d8 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Congestion Management */
> +#ifdef RTE_SCHED_CMAN
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Congestion Management (CMAN) mode
> + *
> + * This is used for controlling the admission of packets into a packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
> + * drops a packet at the onset of the congestion and tries to control the
> + * latency around the target value. The congestion detection, however, is
> based
> + * on the queueing latency instead of the queue length like RED. For more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_cman_mode {
> +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection
> (WRED) */

The algorithm is RED, not WRED. Let's stick to RTE_SCHED_CMAN_RED, please. The Weighted aspect comes into place when defining the struct rte_sched_cman_params::red_params as an array indexed by color below.

> +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time
> @@ -174,12 +197,30 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_CMAN
> +	/** Congestion Management parameters */
> +	struct rte_sched_cman_params *cman_params;

You are instantiating struct rte_sched_cma_params here, but you are defining it just below, i.e. after you attempted to instantiate it. Aren't you getting a build error when compiling with RTE_SCHED_CMAN defined?

>  #endif
>  };
> 
> +#ifdef RTE_SCHED_CMAN
> +/*
> + * Congestion Management configuration parameters.
> + */
> +struct rte_sched_cman_params {
> +	/** Congestion Management mode */
> +	enum rte_sched_cman_mode cman_mode;
> +
> +	union {
> +		/** WRED parameters */

In the comment: RED parameters instead of WRED, please.

> +		struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
> +};
> +#endif
> +

<snip>

> diff --git a/lib/sched/version.map b/lib/sched/version.map
> index 53c337b143..54e5e96d4f 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;

You need to put a comment about the release when these symbols got introduced, similar to above.

>  };
> --
> 2.25.1

Thank you for your work!

Regards,
Cristian

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                               ` (4 preceding siblings ...)
  2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-15 13:56                             ` Dumitrescu, Cristian
  2021-10-19  8:26                               ` Liguzinski, WojciechX
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
  6 siblings, 1 reply; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-10-15 13:56 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Singh, Jasvinder; +Cc: Ajmera, Megha



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Friday, October 15, 2021 9:16 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v14 0/5] Add PIE support for HQoS library
> 
> DPDK sched library is equipped with mechanism that secures it from the
> bufferbloat problem
> which is a situation when excess buffers in the network cause high latency
> and latency
> variation. Currently, it supports RED for active queue management (which is
> designed
> to control the queue length but it does not control latency directly and is now
> being
> obsoleted). 

Please remove the statement that RED is obsolete, as it is not true. Please refer only to the benefits on the new algorithm without any generic negative statements not supported by data versus other algorithms, thank you!

However, more advanced queue management is required to
> address this problem
> and provide desirable quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE"
> (Proportional Integral
> controller Enhanced) that can effectively and directly control queuing latency
> to address
> the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of
> existing and
> adding a new set of data structures to the library, adding PIE related APIs.
> This affects structures in public API/ABI. That is why deprecation notice is
> going
> to be prepared and sent.
> 
> Liguzinski, WojciechX (5):
>   sched: add PIE based congestion management
>   example/qos_sched: add PIE support
>   example/ip_pipeline: add PIE support
>   doc/guides/prog_guide: added PIE
>   app/test: add tests for PIE
> 
>  app/test/meson.build                         |    4 +
>  app/test/test_pie.c                          | 1065 ++++++++++++++++++
>  config/rte_config.h                          |    1 -
>  doc/guides/prog_guide/glossary.rst           |    3 +
>  doc/guides/prog_guide/qos_framework.rst      |   60 +-
>  doc/guides/prog_guide/traffic_management.rst |   13 +-
>  drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
>  examples/ip_pipeline/tmgr.c                  |  142 +--
>  examples/qos_sched/app_thread.c              |    1 -
>  examples/qos_sched/cfg_file.c                |  111 +-
>  examples/qos_sched/cfg_file.h                |    5 +
>  examples/qos_sched/init.c                    |   27 +-
>  examples/qos_sched/main.h                    |    3 +
>  examples/qos_sched/profile.cfg               |  196 ++--
>  lib/sched/meson.build                        |   10 +-
>  lib/sched/rte_pie.c                          |   86 ++
>  lib/sched/rte_pie.h                          |  398 +++++++
>  lib/sched/rte_sched.c                        |  240 ++--
>  lib/sched/rte_sched.h                        |   63 +-
>  lib/sched/version.map                        |    3 +
>  20 files changed, 2161 insertions(+), 276 deletions(-)
>  create mode 100644 app/test/test_pie.c
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v15 0/5] Add PIE support for HQoS library
  2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                               ` (5 preceding siblings ...)
  2021-10-15 13:56                             ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Dumitrescu, Cristian
@ 2021-10-19  8:18                             ` Liguzinski, WojciechX
  2021-10-19  8:18                               ` [dpdk-dev] [PATCH v15 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                                 ` (6 more replies)
  6 siblings, 7 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  8:18 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   60 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  241 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    4 +
 20 files changed, 2171 insertions(+), 284 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v15 1/5] sched: add PIE based congestion management
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
@ 2021-10-19  8:18                               ` Liguzinski, WojciechX
  2021-10-19  8:18                               ` [dpdk-dev] [PATCH v15 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                                 ` (5 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  8:18 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>

- Renamed WRED occurences to RED
- Applied __attribute__((unused)) to parameters in rte_sched_port_pie_dequeue()
- Moved rte_sched_cma_params structure before the first usage
- Added the release comment to version.map when new symbols are being introduced
- Removed the statement that RED is obsolete in cover letter
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 241 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   4 +
 7 files changed, 703 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..a659dca379 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1794,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1821,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1916,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __attribute__((unused)),
+	uint32_t qindex __attribute__((unused)),
+	uint32_t pkt_len __attribute__((unused)),
+	uint64_t time __attribute__((unused))) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2022,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2495,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2515,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..72728e1751 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +162,24 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +215,9 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index 53c337b143..ffa5f7b10d 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v15 2/5] example/qos_sched: add PIE support
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
  2021-10-19  8:18                               ` [dpdk-dev] [PATCH v15 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-19  8:18                               ` Liguzinski, WojciechX
  2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                                 ` (4 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  8:18 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>

- Renamed WRED occurences to RED
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h   |   5 +
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 7 files changed, 250 insertions(+), 110 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..d151da5c2d 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v15 3/5] example/ip_pipeline: add PIE support
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
  2021-10-19  8:18                               ` [dpdk-dev] [PATCH v15 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-19  8:18                               ` [dpdk-dev] [PATCH v15 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-19  8:19                               ` Liguzinski, WojciechX
  2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                                 ` (3 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  8:19 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v15 4/5] doc/guides/prog_guide: added PIE
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
                                                 ` (2 preceding siblings ...)
  2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-19  8:19                               ` Liguzinski, WojciechX
  2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 5/5] app/test: add tests for PIE Liguzinski, WojciechX
                                                 ` (2 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  8:19 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 60 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 ++++-
 3 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c8450181d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
+Droppers
 -------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v15 5/5] app/test: add tests for PIE
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
                                                 ` (3 preceding siblings ...)
  2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-19  8:19                               ` Liguzinski, WojciechX
  2021-10-19 12:18                               ` [dpdk-dev] [PATCH v15 0/5] Add PIE support for HQoS library Dumitrescu, Cristian
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  8:19 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/meson.build |    4 +
 app/test/test_pie.c  | 1065 ++++++++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c  |    6 +-
 lib/sched/rte_pie.h  |   17 +-
 4 files changed, 1085 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/meson.build b/app/test/meson.build
index a16374b7a1..a189b4ebd3 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -114,6 +114,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -245,6 +246,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -296,6 +298,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -309,6 +312,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library
  2021-10-15 13:56                             ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Dumitrescu, Cristian
@ 2021-10-19  8:26                               ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  8:26 UTC (permalink / raw)
  To: Dumitrescu, Cristian, dev, Singh, Jasvinder; +Cc: Ajmera, Megha

Hi Cristian,

Done.

BR,
Wojtek

-----Original Message-----
From: Dumitrescu, Cristian <cristian.dumitrescu@intel.com> 
Sent: Friday, October 15, 2021 3:57 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>
Subject: RE: [PATCH v14 0/5] Add PIE support for HQoS library



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Friday, October 15, 2021 9:16 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; 
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v14 0/5] Add PIE support for HQoS library
> 
> DPDK sched library is equipped with mechanism that secures it from the 
> bufferbloat problem which is a situation when excess buffers in the 
> network cause high latency and latency variation. Currently, it 
> supports RED for active queue management (which is designed to control 
> the queue length but it does not control latency directly and is now 
> being obsoleted).

Please remove the statement that RED is obsolete, as it is not true. Please refer only to the benefits on the new algorithm without any generic negative statements not supported by data versus other algorithms, thank you!

However, more advanced queue management is required to
> address this problem
> and provide desirable quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE"
> (Proportional Integral
> controller Enhanced) that can effectively and directly control queuing 
> latency to address the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of 
> existing and adding a new set of data structures to the library, 
> adding PIE related APIs.
> This affects structures in public API/ABI. That is why deprecation 
> notice is going to be prepared and sent.
> 
> Liguzinski, WojciechX (5):
>   sched: add PIE based congestion management
>   example/qos_sched: add PIE support
>   example/ip_pipeline: add PIE support
>   doc/guides/prog_guide: added PIE
>   app/test: add tests for PIE
> 
>  app/test/meson.build                         |    4 +
>  app/test/test_pie.c                          | 1065 ++++++++++++++++++
>  config/rte_config.h                          |    1 -
>  doc/guides/prog_guide/glossary.rst           |    3 +
>  doc/guides/prog_guide/qos_framework.rst      |   60 +-
>  doc/guides/prog_guide/traffic_management.rst |   13 +-
>  drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
>  examples/ip_pipeline/tmgr.c                  |  142 +--
>  examples/qos_sched/app_thread.c              |    1 -
>  examples/qos_sched/cfg_file.c                |  111 +-
>  examples/qos_sched/cfg_file.h                |    5 +
>  examples/qos_sched/init.c                    |   27 +-
>  examples/qos_sched/main.h                    |    3 +
>  examples/qos_sched/profile.cfg               |  196 ++--
>  lib/sched/meson.build                        |   10 +-
>  lib/sched/rte_pie.c                          |   86 ++
>  lib/sched/rte_pie.h                          |  398 +++++++
>  lib/sched/rte_sched.c                        |  240 ++--
>  lib/sched/rte_sched.h                        |   63 +-
>  lib/sched/version.map                        |    3 +
>  20 files changed, 2161 insertions(+), 276 deletions(-)  create mode 
> 100644 app/test/test_pie.c  create mode 100644 lib/sched/rte_pie.c  
> create mode 100644 lib/sched/rte_pie.h
> 
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management
  2021-10-15 13:51                               ` Dumitrescu, Cristian
@ 2021-10-19  9:34                                 ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19  9:34 UTC (permalink / raw)
  To: Dumitrescu, Cristian, dev, Singh, Jasvinder; +Cc: Ajmera, Megha

Hi Cristian,

- As you asked I have added some points/one-liners in the patches what is changed
- Regarding usage of rte_sched_cman_params structure I will write you a separate message
- Reverted to use the RTE_SCHED_CMAN_RED flag
- Also changed WRED occurrences back to RED
- No, I wasn't getting a compilation error when struct rte_sched_cma_params was defined below its instantiation... But, to be certain I moved it above that place in code.
- version.map updated with a comment, like you suggested

Thanks for the review comments!

BR,
Wojtek

-----Original Message-----
From: Dumitrescu, Cristian <cristian.dumitrescu@intel.com> 
Sent: Friday, October 15, 2021 3:51 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>
Subject: RE: [PATCH v14 1/5] sched: add PIE based congestion management

Hi Wojciech,

Thank you for your patchset!

Can you please, at least starting from this version, add a short change log at the top of your file, just after the signoff line? It helps a lot during the review process, and you can find abundant examples in other patchsets from this email list. One line of description for every change would be nice, thank you.

> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Friday, October 15, 2021 9:16 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; 
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v14 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |  10 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 240 +++++++++-----
>  lib/sched/rte_sched.h                    |  63 +++-
>  lib/sched/version.map                    |   3 +
>  7 files changed, 701 insertions(+), 96 deletions(-)  create mode 
> 100644 lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h
> 

<snip>

> diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
> index a858f61f95..a066eed186 100644
> --- a/lib/sched/rte_sched.c
> +++ b/lib/sched/rte_sched.c

<snip>

> @@ -183,8 +187,14 @@ struct rte_sched_subport {
>  	/* Pipe queues size */
>  	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> 
> -#ifdef RTE_SCHED_RED
> -	struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_CMAN
> +	enum rte_sched_cman_mode cman;
> +
> +	RTE_STD_C11
> +	union {
> +		struct rte_red_config
> red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +		struct rte_pie_config
> pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
>  #endif

Can you please use here the rte_sched_cman_params structure that you just created in rte_sched.h as opposed to inlining this structure. Yes, I agree it might have some ripple effect throughout this file, but I think it should be very limited, and also quick to do.

<snip>

> diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
> index c1a772b70c..07fcf439d8 100644
> --- a/lib/sched/rte_sched.h
> +++ b/lib/sched/rte_sched.h
> @@ -61,9 +61,10 @@ extern "C" {
>  #include <rte_mbuf.h>
>  #include <rte_meter.h>
> 
> -/** Random Early Detection (RED) */
> -#ifdef RTE_SCHED_RED
> +/** Congestion Management */
> +#ifdef RTE_SCHED_CMAN
>  #include "rte_red.h"
> +#include "rte_pie.h"
>  #endif
> 
>  /** Maximum number of queues per pipe.
> @@ -110,6 +111,28 @@ extern "C" {
>  #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
>  #endif
> 
> +/**
> + * Congestion Management (CMAN) mode
> + *
> + * This is used for controlling the admission of packets into a packet queue
> or
> + * group of packet queues on congestion.
> + *
> + * The *Random Early Detection (RED)* algorithm works by proactively
> dropping
> + * more and more input packets as the queue occupancy builds up. When
> the queue
> + * is full or almost full, RED effectively works as *tail drop*. The *Weighted
> + * RED* algorithm uses a separate set of RED thresholds for each packet
> color.
> + *
> + * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
> + * drops a packet at the onset of the congestion and tries to control the
> + * latency around the target value. The congestion detection, however, is
> based
> + * on the queueing latency instead of the queue length like RED. For more
> + * information, refer RFC8033.
> + */
> +enum rte_sched_cman_mode {
> +	RTE_SCHED_CMAN_WRED, /**< Weighted Random Early Detection
> (WRED) */

The algorithm is RED, not WRED. Let's stick to RTE_SCHED_CMAN_RED, please. The Weighted aspect comes into place when defining the struct rte_sched_cman_params::red_params as an array indexed by color below.

> +	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller
> Enhanced (PIE) */
> +};
> +
>  /*
>   * Pipe configuration parameters. The period and credits_per_period
>   * parameters are measured in bytes, with one byte meaning the time
> @@ -174,12 +197,30 @@ struct rte_sched_subport_params {
>  	/** Max allowed profiles in the pipe profile table */
>  	uint32_t n_max_pipe_profiles;
> 
> -#ifdef RTE_SCHED_RED
> -	/** RED parameters */
> -	struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +#ifdef RTE_SCHED_CMAN
> +	/** Congestion Management parameters */
> +	struct rte_sched_cman_params *cman_params;

You are instantiating struct rte_sched_cma_params here, but you are defining it just below, i.e. after you attempted to instantiate it. Aren't you getting a build error when compiling with RTE_SCHED_CMAN defined?

>  #endif
>  };
> 
> +#ifdef RTE_SCHED_CMAN
> +/*
> + * Congestion Management configuration parameters.
> + */
> +struct rte_sched_cman_params {
> +	/** Congestion Management mode */
> +	enum rte_sched_cman_mode cman_mode;
> +
> +	union {
> +		/** WRED parameters */

In the comment: RED parameters instead of WRED, please.

> +		struct rte_red_params
> red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
> +
> +		/** PIE parameters */
> +		struct rte_pie_params
> pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
> +	};
> +};
> +#endif
> +

<snip>

> diff --git a/lib/sched/version.map b/lib/sched/version.map
> index 53c337b143..54e5e96d4f 100644
> --- a/lib/sched/version.map
> +++ b/lib/sched/version.map
> @@ -30,4 +30,7 @@ EXPERIMENTAL {
>  	rte_sched_subport_pipe_profile_add;
>  	# added in 20.11
>  	rte_sched_port_subport_profile_add;
> +
> +	rte_pie_rt_data_init;
> +	rte_pie_config_init;

You need to put a comment about the release when these symbols got introduced, similar to above.

>  };
> --
> 2.25.1

Thank you for your work!

Regards,
Cristian

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v15 0/5] Add PIE support for HQoS library
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
                                                 ` (4 preceding siblings ...)
  2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-19 12:18                               ` Dumitrescu, Cristian
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
  6 siblings, 0 replies; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-10-19 12:18 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Singh, Jasvinder; +Cc: Ajmera, Megha



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Tuesday, October 19, 2021 9:19 AM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v15 0/5] Add PIE support for HQoS library
> 
> DPDK sched library is equipped with mechanism that secures it from the
> bufferbloat problem
> which is a situation when excess buffers in the network cause high latency
> and latency
> variation. Currently, it supports RED for active queue management.
> However, more
> advanced queue management is required to address this problem and
> provide desirable
> quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE"
> (Proportional Integral
> controller Enhanced) that can effectively and directly control queuing latency
> to address
> the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of
> existing and
> adding a new set of data structures to the library, adding PIE related APIs.
> This affects structures in public API/ABI. That is why deprecation notice is
> going
> to be prepared and sent.
> 
> Liguzinski, WojciechX (5):
>   sched: add PIE based congestion management
>   example/qos_sched: add PIE support
>   example/ip_pipeline: add PIE support
>   doc/guides/prog_guide: added PIE
>   app/test: add tests for PIE
> 
>  app/test/meson.build                         |    4 +
>  app/test/test_pie.c                          | 1065 ++++++++++++++++++
>  config/rte_config.h                          |    1 -
>  doc/guides/prog_guide/glossary.rst           |    3 +
>  doc/guides/prog_guide/qos_framework.rst      |   60 +-
>  doc/guides/prog_guide/traffic_management.rst |   13 +-
>  drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
>  examples/ip_pipeline/tmgr.c                  |  142 +--
>  examples/qos_sched/app_thread.c              |    1 -
>  examples/qos_sched/cfg_file.c                |  127 ++-
>  examples/qos_sched/cfg_file.h                |    5 +
>  examples/qos_sched/init.c                    |   27 +-
>  examples/qos_sched/main.h                    |    3 +
>  examples/qos_sched/profile.cfg               |  196 ++--
>  lib/sched/meson.build                        |   10 +-
>  lib/sched/rte_pie.c                          |   86 ++
>  lib/sched/rte_pie.h                          |  398 +++++++
>  lib/sched/rte_sched.c                        |  241 ++--
>  lib/sched/rte_sched.h                        |   63 +-
>  lib/sched/version.map                        |    4 +
>  20 files changed, 2171 insertions(+), 284 deletions(-)
>  create mode 100644 app/test/test_pie.c
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h
> 
> --
> 2.25.1

Series-acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v16 0/5] Add PIE support for HQoS library
  2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
                                                 ` (5 preceding siblings ...)
  2021-10-19 12:18                               ` [dpdk-dev] [PATCH v15 0/5] Add PIE support for HQoS library Dumitrescu, Cristian
@ 2021-10-19 12:45                               ` Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                                   ` (5 more replies)
  6 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19 12:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   62 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  241 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    4 +
 20 files changed, 2172 insertions(+), 285 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v16 1/5] sched: add PIE based congestion management
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
@ 2021-10-19 12:45                                 ` Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19 12:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>

- Renamed WRED occurences to RED
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()
---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 241 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   4 +
 7 files changed, 703 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..0db5335bb6 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1794,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1821,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1916,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2022,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2495,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2515,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..72728e1751 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +162,24 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +215,9 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index 53c337b143..ffa5f7b10d 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v16 2/5] example/qos_sched: add PIE support
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-19 12:45                                 ` Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19 12:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>

---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h   |   5 +
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 7 files changed, 250 insertions(+), 110 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..d151da5c2d 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v16 3/5] example/ip_pipeline: add PIE support
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-19 12:45                                 ` Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19 12:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v16 4/5] doc/guides/prog_guide: added PIE
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
                                                   ` (2 preceding siblings ...)
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-19 12:45                                 ` Liguzinski, WojciechX
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19 12:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>

- Corrected marker for Droppers title
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 62 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 +++-
 3 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..84826c7d2d 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v16 5/5] app/test: add tests for PIE
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
                                                   ` (3 preceding siblings ...)
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-19 12:45                                 ` Liguzinski, WojciechX
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-19 12:45 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/meson.build |    4 +
 app/test/test_pie.c  | 1065 ++++++++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c  |    6 +-
 lib/sched/rte_pie.h  |   17 +-
 4 files changed, 1085 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/meson.build b/app/test/meson.build
index a16374b7a1..a189b4ebd3 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -114,6 +114,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -245,6 +246,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -296,6 +298,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -309,6 +312,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library
  2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
                                                   ` (4 preceding siblings ...)
  2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-20  7:49                                 ` Liguzinski, WojciechX
  2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                                     ` (5 more replies)
  5 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-20  7:49 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/app_thread.c              |    1 -
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |   10 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  241 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    4 +
 20 files changed, 2173 insertions(+), 286 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1

Series-acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v17 1/5] sched: add PIE based congestion management
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-20  7:49                                   ` Liguzinski, WojciechX
  2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-20  7:49 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
--
Changes in V17:
- Corrected paragraph link naming in qos_framework.rst to fix CI builds

Changes in V16:
- Fixed 'title underline too short' error in qos_framework.rst
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()

---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |  10 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 241 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   4 +
 7 files changed, 703 insertions(+), 96 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index b24f7b8775..e7ae9bcf19 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -1,11 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
-headers = files(
-        'rte_approx.h',
-        'rte_red.h',
-        'rte_sched.h',
-        'rte_sched_common.h',
-)
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
+headers = files('rte_sched.h', 'rte_sched_common.h',
+		'rte_red.h', 'rte_approx.h', 'rte_pie.h')
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..0db5335bb6 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1794,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1821,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1916,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2022,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2495,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2515,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..72728e1751 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +162,24 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +215,9 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index 53c337b143..ffa5f7b10d 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-20  7:49                                   ` Liguzinski, WojciechX
  2021-10-20 15:11                                     ` Stephen Hemminger
  2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-20  7:49 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h             |   1 -
 examples/qos_sched/app_thread.c |   1 -
 examples/qos_sched/cfg_file.c   | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h   |   5 +
 examples/qos_sched/init.c       |  27 +++--
 examples/qos_sched/main.h       |   3 +
 examples/qos_sched/profile.cfg  | 196 +++++++++++++++++++++-----------
 7 files changed, 250 insertions(+), 110 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 590903c07d..48132f27df 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..895c0d3592 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
 		if (likely(nb_pkt)) {
 			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
 					nb_pkt);
-
 			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
 			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
 		}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 1abe003fc6..d151da5c2d 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -204,15 +204,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -279,7 +273,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v17 3/5] example/ip_pipeline: add PIE support
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-20  7:50                                   ` Liguzinski, WojciechX
  2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-20  7:50 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v17 4/5] doc/guides/prog_guide: added PIE
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                     ` (2 preceding siblings ...)
  2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-20  7:50                                   ` Liguzinski, WojciechX
  2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-20  7:50 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 64 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 +++-
 3 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c37b78804 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v17 5/5] app/test: add tests for PIE
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                     ` (3 preceding siblings ...)
  2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-20  7:50                                   ` Liguzinski, WojciechX
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-20  7:50 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/meson.build |    4 +
 app/test/test_pie.c  | 1065 ++++++++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c  |    6 +-
 lib/sched/rte_pie.h  |   17 +-
 4 files changed, 1085 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/meson.build b/app/test/meson.build
index a16374b7a1..a189b4ebd3 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -114,6 +114,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -245,6 +246,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -296,6 +298,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -309,6 +312,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support
  2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-20 15:11                                     ` Stephen Hemminger
  2021-10-20 18:28                                       ` Liguzinski, WojciechX
  0 siblings, 1 reply; 178+ messages in thread
From: Stephen Hemminger @ 2021-10-20 15:11 UTC (permalink / raw)
  To: Liguzinski, WojciechX
  Cc: dev, jasvinder.singh, cristian.dumitrescu, megha.ajmera

On Wed, 20 Oct 2021 07:49:59 +0000
"Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:

> diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
> index dbc878b553..895c0d3592 100644
> --- a/examples/qos_sched/app_thread.c
> +++ b/examples/qos_sched/app_thread.c
> @@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
>  		if (likely(nb_pkt)) {
>  			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
>  					nb_pkt);
> -
>  			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
>  			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
>  		}

Unnecessary whitespace change, drop this in next revision?

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support
  2021-10-20 15:11                                     ` Stephen Hemminger
@ 2021-10-20 18:28                                       ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-20 18:28 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Singh, Jasvinder, Dumitrescu, Cristian, Ajmera, Megha

Hi Stephen,

Sure, no problem with that.
I probably changed that line together with something else in some earlier version and didn't notice it now.

Thanks,
Wojtek

-----Original Message-----
From: Stephen Hemminger <stephen@networkplumber.org> 
Sent: Wednesday, October 20, 2021 5:11 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Cc: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Ajmera, Megha <megha.ajmera@intel.com>
Subject: Re: [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support

On Wed, 20 Oct 2021 07:49:59 +0000
"Liguzinski, WojciechX" <wojciechx.liguzinski@intel.com> wrote:

> diff --git a/examples/qos_sched/app_thread.c 
> b/examples/qos_sched/app_thread.c index dbc878b553..895c0d3592 100644
> --- a/examples/qos_sched/app_thread.c
> +++ b/examples/qos_sched/app_thread.c
> @@ -205,7 +205,6 @@ app_worker_thread(struct thread_conf **confs)
>  		if (likely(nb_pkt)) {
>  			int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
>  					nb_pkt);
> -
>  			APP_STATS_ADD(conf->stat.nb_drop, nb_pkt - nb_sent);
>  			APP_STATS_ADD(conf->stat.nb_rx, nb_pkt);
>  		}

Unnecessary whitespace change, drop this in next revision?

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
  2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                     ` (4 preceding siblings ...)
  2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-25 11:32                                   ` Liguzinski, WojciechX
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                                       ` (6 more replies)
  5 siblings, 7 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-25 11:32 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  241 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    4 +
 19 files changed, 2172 insertions(+), 279 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1

Series-acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v18 1/5] sched: add PIE based congestion management
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-10-25 11:32                                     ` Liguzinski, WojciechX
  2021-10-26 21:07                                       ` Singh, Jasvinder
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                                       ` (5 subsequent siblings)
  6 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-25 11:32 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
--
Changes in V18:
- Resolved merge conflict in lib/sched/meson.build after rebasing ontop of main
- Reverted whitespace change in app_thread.c - comment from Stephen Hemminger

Changes in V17:
- Corrected paragraph link naming in qos_framework.rst to fix CI builds

Changes in V16:
- Fixed 'title underline too short' error in qos_framework.rst
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()

---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |   3 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 241 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   4 +
 7 files changed, 702 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index 8ced4547aa..df75db51ed 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -7,11 +7,12 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
 headers = files(
         'rte_approx.h',
         'rte_red.h',
         'rte_sched.h',
         'rte_sched_common.h',
+        'rte_pie.h',
 )
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..0db5335bb6 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1794,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1821,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1916,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2022,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2495,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2515,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index c1a772b70c..72728e1751 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +162,24 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +215,9 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index 53c337b143..ffa5f7b10d 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 	rte_sched_subport_pipe_profile_add;
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v18 2/5] example/qos_sched: add PIE support
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-25 11:32                                     ` Liguzinski, WojciechX
  2021-10-26 21:08                                       ` Singh, Jasvinder
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                                       ` (4 subsequent siblings)
  6 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-25 11:32 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 config/rte_config.h            |   1 -
 examples/qos_sched/cfg_file.c  | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h  |   5 +
 examples/qos_sched/init.c      |  27 +++--
 examples/qos_sched/main.h      |   3 +
 examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
 6 files changed, 250 insertions(+), 109 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index e0ead8b251..740f42c7e9 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -90,7 +90,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 9b34e4a76b..3c1f0bc680 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -203,15 +203,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,7 +272,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v18 3/5] example/ip_pipeline: add PIE support
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-25 11:32                                     ` Liguzinski, WojciechX
  2021-10-26 21:09                                       ` Singh, Jasvinder
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                                       ` (3 subsequent siblings)
  6 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-25 11:32 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v18 4/5] doc/guides/prog_guide: added PIE
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                       ` (2 preceding siblings ...)
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-25 11:32                                     ` Liguzinski, WojciechX
  2021-10-26 21:09                                       ` Singh, Jasvinder
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 5/5] app/test: add tests for PIE Liguzinski, WojciechX
                                                       ` (2 subsequent siblings)
  6 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-25 11:32 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 64 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 +++-
 3 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c37b78804 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v18 5/5] app/test: add tests for PIE
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                       ` (3 preceding siblings ...)
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-25 11:32                                     ` Liguzinski, WojciechX
  2021-10-26 21:11                                       ` Singh, Jasvinder
  2021-10-26  8:24                                     ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liu, Yu Y
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
  6 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-25 11:32 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
---
 app/test/meson.build |    4 +
 app/test/test_pie.c  | 1065 ++++++++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c  |    6 +-
 lib/sched/rte_pie.h  |   17 +-
 4 files changed, 1085 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 20f36a1803..2ac716629b 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -115,6 +115,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -249,6 +250,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -300,6 +302,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -313,6 +316,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                       ` (4 preceding siblings ...)
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-26  8:24                                     ` Liu, Yu Y
  2021-10-26  8:33                                       ` Thomas Monjalon
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
  6 siblings, 1 reply; 178+ messages in thread
From: Liu, Yu Y @ 2021-10-26  8:24 UTC (permalink / raw)
  To: Thomas Monjalon, dev, Liguzinski, WojciechX, Singh, Jasvinder,
	Dumitrescu, Cristian
  Cc: Ajmera, Megha, Liu, Yu Y

Hi Thomas,

Would you merge this patch as the series is acked by Cristian as below?
https://patchwork.dpdk.org/project/dpdk/cover/20211019081902.3514841-1-wojciechx.liguzinski@intel.com/ 

Thanks & Regards,
Yu Liu

-----Original Message-----
From: dev <dev-bounces@dpdk.org> On Behalf Of Liguzinski, WojciechX
Sent: Monday, October 25, 2021 7:32 PM
To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
Cc: Ajmera, Megha <megha.ajmera@intel.com>
Subject: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem which is a situation when excess buffers in the network cause high latency and latency variation. Currently, it supports RED for active queue management. However, more advanced queue management is required to address this problem and provide desirable quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral controller Enhanced) that can effectively and directly control queuing latency to address the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  241 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    4 +
 19 files changed, 2172 insertions(+), 279 deletions(-)  create mode 100644 app/test/test_pie.c  create mode 100644 lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h

--
2.25.1

Series-acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
  2021-10-26  8:24                                     ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liu, Yu Y
@ 2021-10-26  8:33                                       ` Thomas Monjalon
  2021-10-26 10:02                                         ` Dumitrescu, Cristian
  0 siblings, 1 reply; 178+ messages in thread
From: Thomas Monjalon @ 2021-10-26  8:33 UTC (permalink / raw)
  To: Liguzinski, WojciechX, Singh, Jasvinder, Dumitrescu, Cristian, Liu, Yu Y
  Cc: dev, Ajmera, Megha, Liu, Yu Y, david.marchand

26/10/2021 10:24, Liu, Yu Y:
> Hi Thomas,
> 
> Would you merge this patch as the series is acked by Cristian as below?
> https://patchwork.dpdk.org/project/dpdk/cover/20211019081902.3514841-1-wojciechx.liguzinski@intel.com/

I didn't see any email from Cristian.
It seems you just added this ack silently at the bottom of the cover letter.

1/ an email from Cristian is far better
2/ when integrating ack, it must be done in patches, not cover letter


> 
> Thanks & Regards,
> Yu Liu
> 
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Liguzinski, WojciechX
> Sent: Monday, October 25, 2021 7:32 PM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
> 
> DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem which is a situation when excess buffers in the network cause high latency and latency variation. Currently, it supports RED for active queue management. However, more advanced queue management is required to address this problem and provide desirable quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral controller Enhanced) that can effectively and directly control queuing latency to address the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of existing and adding a new set of data structures to the library, adding PIE related APIs.
> This affects structures in public API/ABI. That is why deprecation notice is going to be prepared and sent.
> 
> Liguzinski, WojciechX (5):
>   sched: add PIE based congestion management
>   example/qos_sched: add PIE support
>   example/ip_pipeline: add PIE support
>   doc/guides/prog_guide: added PIE
>   app/test: add tests for PIE
> 
>  app/test/meson.build                         |    4 +
>  app/test/test_pie.c                          | 1065 ++++++++++++++++++
>  config/rte_config.h                          |    1 -
>  doc/guides/prog_guide/glossary.rst           |    3 +
>  doc/guides/prog_guide/qos_framework.rst      |   64 +-
>  doc/guides/prog_guide/traffic_management.rst |   13 +-
>  drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
>  examples/ip_pipeline/tmgr.c                  |  142 +--
>  examples/qos_sched/cfg_file.c                |  127 ++-
>  examples/qos_sched/cfg_file.h                |    5 +
>  examples/qos_sched/init.c                    |   27 +-
>  examples/qos_sched/main.h                    |    3 +
>  examples/qos_sched/profile.cfg               |  196 ++--
>  lib/sched/meson.build                        |    3 +-
>  lib/sched/rte_pie.c                          |   86 ++
>  lib/sched/rte_pie.h                          |  398 +++++++
>  lib/sched/rte_sched.c                        |  241 ++--
>  lib/sched/rte_sched.h                        |   63 +-
>  lib/sched/version.map                        |    4 +
>  19 files changed, 2172 insertions(+), 279 deletions(-)  create mode 100644 app/test/test_pie.c  create mode 100644 lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h
> 
> --
> 2.25.1
> 
> Series-acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> 






^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
  2021-10-26  8:33                                       ` Thomas Monjalon
@ 2021-10-26 10:02                                         ` Dumitrescu, Cristian
  2021-10-26 10:10                                           ` Thomas Monjalon
  0 siblings, 1 reply; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-10-26 10:02 UTC (permalink / raw)
  To: Thomas Monjalon, Liguzinski, WojciechX, Singh, Jasvinder, Liu,
	Yu Y, Singh, Jasvinder
  Cc: dev, Ajmera, Megha, Liu, Yu Y, david.marchand



> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Tuesday, October 26, 2021 9:33 AM
> To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; Singh, Jasvinder
> <jasvinder.singh@intel.com>; Dumitrescu, Cristian
> <cristian.dumitrescu@intel.com>; Liu, Yu Y <yu.y.liu@intel.com>
> Cc: dev@dpdk.org; Ajmera, Megha <megha.ajmera@intel.com>; Liu, Yu Y
> <yu.y.liu@intel.com>; david.marchand@redhat.com
> Subject: Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
>
> 26/10/2021 10:24, Liu, Yu Y:
> > Hi Thomas,
> >
> > Would you merge this patch as the series is acked by Cristian as below?
> >
> https://patchwork.dpdk.org/project/dpdk/cover/20211019081902.3514841-
> 1-wojciechx.liguzinski@intel.com/
>
> I didn't see any email from Cristian.
> It seems you just added this ack silently at the bottom of the cover letter.
>
> 1/ an email from Cristian is far better
> 2/ when integrating ack, it must be done in patches, not cover letter
>

Hi Thomas,

I did ack this set in a previous version (V15) by replying with "Series-acked-by" on the cover letter email, which does not show in patchwork. Is there a better way to do this?

It would be good to have Jasvinder's ack as well on this series, as he is looking into some other aspects of the sched library.

Regards,
Cristian
>
> >
> > Thanks & Regards,
> > Yu Liu
> >
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Liguzinski, WojciechX
> > Sent: Monday, October 25, 2021 7:32 PM
> > To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> > Cc: Ajmera, Megha <megha.ajmera@intel.com>
> > Subject: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
> >
> > DPDK sched library is equipped with mechanism that secures it from the
> bufferbloat problem which is a situation when excess buffers in the network
> cause high latency and latency variation. Currently, it supports RED for active
> queue management. However, more advanced queue management is
> required to address this problem and provide desirable quality of service to
> users.
> >
> > This solution (RFC) proposes usage of new algorithm called "PIE"
> (Proportional Integral controller Enhanced) that can effectively and directly
> control queuing latency to address the bufferbloat problem.
> >
> > The implementation of mentioned functionality includes modification of
> existing and adding a new set of data structures to the library, adding PIE
> related APIs.
> > This affects structures in public API/ABI. That is why deprecation notice is
> going to be prepared and sent.
> >
> > Liguzinski, WojciechX (5):
> >   sched: add PIE based congestion management
> >   example/qos_sched: add PIE support
> >   example/ip_pipeline: add PIE support
> >   doc/guides/prog_guide: added PIE
> >   app/test: add tests for PIE
> >
> >  app/test/meson.build                         |    4 +
> >  app/test/test_pie.c                          | 1065 ++++++++++++++++++
> >  config/rte_config.h                          |    1 -
> >  doc/guides/prog_guide/glossary.rst           |    3 +
> >  doc/guides/prog_guide/qos_framework.rst      |   64 +-
> >  doc/guides/prog_guide/traffic_management.rst |   13 +-
> >  drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
> >  examples/ip_pipeline/tmgr.c                  |  142 +--
> >  examples/qos_sched/cfg_file.c                |  127 ++-
> >  examples/qos_sched/cfg_file.h                |    5 +
> >  examples/qos_sched/init.c                    |   27 +-
> >  examples/qos_sched/main.h                    |    3 +
> >  examples/qos_sched/profile.cfg               |  196 ++--
> >  lib/sched/meson.build                        |    3 +-
> >  lib/sched/rte_pie.c                          |   86 ++
> >  lib/sched/rte_pie.h                          |  398 +++++++
> >  lib/sched/rte_sched.c                        |  241 ++--
> >  lib/sched/rte_sched.h                        |   63 +-
> >  lib/sched/version.map                        |    4 +
> >  19 files changed, 2172 insertions(+), 279 deletions(-)  create mode 100644
> app/test/test_pie.c  create mode 100644 lib/sched/rte_pie.c  create mode
> 100644 lib/sched/rte_pie.h
> >
> > --
> > 2.25.1
> >
> > Series-acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> >
>
>
>
>


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
  2021-10-26 10:02                                         ` Dumitrescu, Cristian
@ 2021-10-26 10:10                                           ` Thomas Monjalon
  2021-10-26 10:20                                             ` Liguzinski, WojciechX
  0 siblings, 1 reply; 178+ messages in thread
From: Thomas Monjalon @ 2021-10-26 10:10 UTC (permalink / raw)
  To: Liguzinski, WojciechX, Singh, Jasvinder, Liu, Yu Y, Singh,
	Jasvinder, Dumitrescu, Cristian
  Cc: dev, Ajmera, Megha, Liu, Yu Y, david.marchand

26/10/2021 12:02, Dumitrescu, Cristian:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 26/10/2021 10:24, Liu, Yu Y:
> > > Hi Thomas,
> > >
> > > Would you merge this patch as the series is acked by Cristian as below?
> > >
> > https://patchwork.dpdk.org/project/dpdk/cover/20211019081902.3514841-
> > 1-wojciechx.liguzinski@intel.com/
> > 
> > I didn't see any email from Cristian.
> > It seems you just added this ack silently at the bottom of the cover letter.
> > 
> > 1/ an email from Cristian is far better
> > 2/ when integrating ack, it must be done in patches, not cover letter
> > 
> 
> Hi Thomas,
> 
> I did ack this set in a previous version (V15) by replying with "Series-acked-by" on the cover letter email, which does not show in patchwork. Is there a better way to do this?

No you did the right thing (I missed this email on v15).
But v16 did not show your ack.
And v17 added it only in the cover letter instead of reporting it in all patches.


> It would be good to have Jasvinder's ack as well on this series, as he is looking into some other aspects of the sched library.

Yes



^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
  2021-10-26 10:10                                           ` Thomas Monjalon
@ 2021-10-26 10:20                                             ` Liguzinski, WojciechX
  2021-10-26 10:25                                               ` Thomas Monjalon
  0 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-26 10:20 UTC (permalink / raw)
  To: Thomas Monjalon, Singh, Jasvinder, Liu, Yu Y, Singh, Jasvinder,
	Dumitrescu, Cristian
  Cc: dev, Ajmera, Megha, Liu, Yu Y, david.marchand

Hi,

V16 - My bad, probably I haven't copied it correctly when preparing cover letter
V17 - I understood Cristian's comment as to copy the Series ACK to next versions of patches, and not to "split it" for each one. If that was the correct way I had no knowledge about it.

Wojtek

-----Original Message-----
From: Thomas Monjalon <thomas@monjalon.net> 
Sent: Tuesday, October 26, 2021 12:10 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>; Singh, Jasvinder <jasvinder.singh@intel.com>; Liu, Yu Y <yu.y.liu@intel.com>; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
Cc: dev@dpdk.org; Ajmera, Megha <megha.ajmera@intel.com>; Liu, Yu Y <yu.y.liu@intel.com>; david.marchand@redhat.com
Subject: Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library

26/10/2021 12:02, Dumitrescu, Cristian:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 26/10/2021 10:24, Liu, Yu Y:
> > > Hi Thomas,
> > >
> > > Would you merge this patch as the series is acked by Cristian as below?
> > >
> > https://patchwork.dpdk.org/project/dpdk/cover/20211019081902.3514841
> > -
> > 1-wojciechx.liguzinski@intel.com/
> > 
> > I didn't see any email from Cristian.
> > It seems you just added this ack silently at the bottom of the cover letter.
> > 
> > 1/ an email from Cristian is far better 2/ when integrating ack, it 
> > must be done in patches, not cover letter
> > 
> 
> Hi Thomas,
> 
> I did ack this set in a previous version (V15) by replying with "Series-acked-by" on the cover letter email, which does not show in patchwork. Is there a better way to do this?

No you did the right thing (I missed this email on v15).
But v16 did not show your ack.
And v17 added it only in the cover letter instead of reporting it in all patches.


> It would be good to have Jasvinder's ack as well on this series, as he is looking into some other aspects of the sched library.

Yes



^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library
  2021-10-26 10:20                                             ` Liguzinski, WojciechX
@ 2021-10-26 10:25                                               ` Thomas Monjalon
  0 siblings, 0 replies; 178+ messages in thread
From: Thomas Monjalon @ 2021-10-26 10:25 UTC (permalink / raw)
  To: Singh, Jasvinder, Liu, Yu Y, Singh, Jasvinder, Dumitrescu,
	Cristian, Liguzinski, WojciechX
  Cc: dev, Ajmera, Megha, Liu, Yu Y, david.marchand, john.mcnamara

26/10/2021 12:20, Liguzinski, WojciechX:
> Hi,
> 
> V16 - My bad, probably I haven't copied it correctly when preparing cover letter
> V17 - I understood Cristian's comment as to copy the Series ACK to next versions of patches, and not to "split it" for each one. If that was the correct way I had no knowledge about it.

Yes you had to reproduce it in each patch. Otherwise who would do it
to make it appear in patchwork and in the git history when merged?
I understand you did not have that knowledge.
For future, I hope the Intel team will better track features patches
of newcomers so they don't miss something knew by others.

John, I know knowledge sharing is not an easy task, we always have to improve :)



> From: Thomas Monjalon <thomas@monjalon.net> 
> 26/10/2021 12:02, Dumitrescu, Cristian:
> > From: Thomas Monjalon <thomas@monjalon.net>
> > > 26/10/2021 10:24, Liu, Yu Y:
> > > > Hi Thomas,
> > > >
> > > > Would you merge this patch as the series is acked by Cristian as below?
> > > >
> > > https://patchwork.dpdk.org/project/dpdk/cover/20211019081902.3514841
> > > -
> > > 1-wojciechx.liguzinski@intel.com/
> > > 
> > > I didn't see any email from Cristian.
> > > It seems you just added this ack silently at the bottom of the cover letter.
> > > 
> > > 1/ an email from Cristian is far better 2/ when integrating ack, it 
> > > must be done in patches, not cover letter
> > > 
> > 
> > Hi Thomas,
> > 
> > I did ack this set in a previous version (V15) by replying with "Series-acked-by" on the cover letter email, which does not show in patchwork. Is there a better way to do this?
> 
> No you did the right thing (I missed this email on v15).
> But v16 did not show your ack.
> And v17 added it only in the cover letter instead of reporting it in all patches.
> 
> 
> > It would be good to have Jasvinder's ack as well on this series, as he is looking into some other aspects of the sched library.
> 
> Yes
> 
> 
> 






^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 1/5] sched: add PIE based congestion management
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-26 21:07                                       ` Singh, Jasvinder
  0 siblings, 0 replies; 178+ messages in thread
From: Singh, Jasvinder @ 2021-10-26 21:07 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Dumitrescu, Cristian; +Cc: Ajmera, Megha



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 25, 2021 12:32 PM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v18 1/5] sched: add PIE based congestion management
> 
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> --
> Changes in V18:
> - Resolved merge conflict in lib/sched/meson.build after rebasing ontop of
> main
> - Reverted whitespace change in app_thread.c - comment from Stephen
> Hemminger
> 
> Changes in V17:
> - Corrected paragraph link naming in qos_framework.rst to fix CI builds
> 
> Changes in V16:
> - Fixed 'title underline too short' error in qos_framework.rst
> - Applied __rte_unused macro to parameters in
> rte_sched_port_pie_dequeue()
> 
> ---
>  drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
>  lib/sched/meson.build                    |   3 +-
>  lib/sched/rte_pie.c                      |  82 +++++
>  lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
>  lib/sched/rte_sched.c                    | 241 +++++++++-----
>  lib/sched/rte_sched.h                    |  63 +++-
>  lib/sched/version.map                    |   4 +
>  7 files changed, 702 insertions(+), 90 deletions(-)  create mode 100644
> lib/sched/rte_pie.c  create mode 100644 lib/sched/rte_pie.h
> 

Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 2/5] example/qos_sched: add PIE support
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-26 21:08                                       ` Singh, Jasvinder
  0 siblings, 0 replies; 178+ messages in thread
From: Singh, Jasvinder @ 2021-10-26 21:08 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Dumitrescu, Cristian; +Cc: Ajmera, Megha



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 25, 2021 12:32 PM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v18 2/5] example/qos_sched: add PIE support
> 
> patch add support enable PIE or RED by
> parsing config file.
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  config/rte_config.h            |   1 -
>  examples/qos_sched/cfg_file.c  | 127 +++++++++++++++------
>  examples/qos_sched/cfg_file.h  |   5 +
>  examples/qos_sched/init.c      |  27 +++--
>  examples/qos_sched/main.h      |   3 +
>  examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
>  6 files changed, 250 insertions(+), 109 deletions(-)
> 

Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 3/5] example/ip_pipeline: add PIE support
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-26 21:09                                       ` Singh, Jasvinder
  0 siblings, 0 replies; 178+ messages in thread
From: Singh, Jasvinder @ 2021-10-26 21:09 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Dumitrescu, Cristian; +Cc: Ajmera, Megha



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 25, 2021 12:32 PM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v18 3/5] example/ip_pipeline: add PIE support
> 
> Adding the PIE support for IP Pipeline
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
>  1 file changed, 74 insertions(+), 68 deletions(-)
> 

Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 4/5] doc/guides/prog_guide: added PIE
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-26 21:09                                       ` Singh, Jasvinder
  0 siblings, 0 replies; 178+ messages in thread
From: Singh, Jasvinder @ 2021-10-26 21:09 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Dumitrescu, Cristian; +Cc: Ajmera, Megha



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 25, 2021 12:32 PM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v18 4/5] doc/guides/prog_guide: added PIE
> 
> Added PIE related information to documentation.
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  doc/guides/prog_guide/glossary.rst           |  3 +
>  doc/guides/prog_guide/qos_framework.rst      | 64 +++++++++++++++++---
>  doc/guides/prog_guide/traffic_management.rst | 13 +++-
>  3 files changed, 68 insertions(+), 12 deletions(-)
> 

Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v18 5/5] app/test: add tests for PIE
  2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-10-26 21:11                                       ` Singh, Jasvinder
  0 siblings, 0 replies; 178+ messages in thread
From: Singh, Jasvinder @ 2021-10-26 21:11 UTC (permalink / raw)
  To: Liguzinski, WojciechX, dev, Dumitrescu, Cristian; +Cc: Ajmera, Megha



> -----Original Message-----
> From: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Sent: Monday, October 25, 2021 12:32 PM
> To: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>;
> Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: Ajmera, Megha <megha.ajmera@intel.com>
> Subject: [PATCH v18 5/5] app/test: add tests for PIE
> 
> Tests for PIE code added to test application.
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> ---
>  app/test/meson.build |    4 +
>  app/test/test_pie.c  | 1065
> ++++++++++++++++++++++++++++++++++++++++++
>  lib/sched/rte_pie.c  |    6 +-
>  lib/sched/rte_pie.h  |   17 +-
>  4 files changed, 1085 insertions(+), 7 deletions(-)  create mode 100644
> app/test/test_pie.c
> 

Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v19 0/5] Add PIE support for HQoS library
  2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                       ` (5 preceding siblings ...)
  2021-10-26  8:24                                     ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liu, Yu Y
@ 2021-10-28 10:17                                     ` Liguzinski, WojciechX
  2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                                         ` (5 more replies)
  6 siblings, 6 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-28 10:17 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Liguzinski, WojciechX (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  241 ++--
 lib/sched/rte_sched.h                        |   63 +-
 lib/sched/version.map                        |    4 +
 19 files changed, 2172 insertions(+), 279 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
@ 2021-10-28 10:17                                       ` Liguzinski, WojciechX
  2021-10-29 13:44                                         ` Thomas Monjalon
  2021-10-29 13:57                                         ` Thomas Monjalon
  2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                                         ` (4 subsequent siblings)
  5 siblings, 2 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-28 10:17 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>

--
Changes in V19:
- ACKs included in patches

Changes in V18:
- Resolved merge conflict in lib/sched/meson.build after rebasing ontop of main
- Reverted whitespace change in app_thread.c - comment from Stephen Hemminger

Changes in V17:
- Corrected paragraph link naming in qos_framework.rst to fix CI builds

Changes in V16:
- Fixed 'title underline too short' error in qos_framework.rst
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()

---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |   3 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 241 +++++++++-----
 lib/sched/rte_sched.h                    |  63 +++-
 lib/sched/version.map                    |   4 +
 7 files changed, 702 insertions(+), 90 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index 8ced4547aa..df75db51ed 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -7,11 +7,12 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
 headers = files(
         'rte_approx.h',
         'rte_red.h',
         'rte_sched.h',
         'rte_sched_common.h',
+        'rte_pie.h',
 )
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..0db5335bb6 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,14 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1088,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+		status = rte_sched_cman_config(port, s, params, n_subports);
+		if (status) {
+			RTE_LOG(NOTICE, SCHED, "%s: CMAN configuration fails\n", __func__);
+			return status;
 		}
 #endif
 
@@ -1718,30 +1794,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1821,93 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+		rte_red_mark_queue_empty(red, port->time);
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1916,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
 
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2022,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2495,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2515,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index cb851301e9..9381b253b7 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,9 +61,10 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
+#ifdef RTE_SCHED_CMAN
 #include "rte_red.h"
+#include "rte_pie.h"
 #endif
 
 /** Maximum number of queues per pipe.
@@ -110,6 +111,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +162,24 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+#ifdef RTE_SCHED_CMAN
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+#endif
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,9 +215,9 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	/** Congestion Management parameters */
+	struct rte_sched_cman_params *cman_params;
 #endif
 };
 
@@ -208,10 +249,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +261,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index a6e505c8ac..d22c07fc9f 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v19 2/5] example/qos_sched: add PIE support
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
  2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-28 10:17                                       ` Liguzinski, WojciechX
  2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                                         ` (3 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-28 10:17 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 config/rte_config.h            |   1 -
 examples/qos_sched/cfg_file.c  | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h  |   5 +
 examples/qos_sched/init.c      |  27 +++--
 examples/qos_sched/main.h      |   3 +
 examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
 6 files changed, 250 insertions(+), 109 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 1a66b42fcc..6ec687a555 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 9b34e4a76b..3c1f0bc680 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -203,15 +203,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,7 +272,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v19 3/5] example/ip_pipeline: add PIE support
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
  2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-10-28 10:18                                       ` Liguzinski, WojciechX
  2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                                         ` (2 subsequent siblings)
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-28 10:18 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Adding the PIE support for IP Pipeline

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v19 4/5] doc/guides/prog_guide: added PIE
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
                                                         ` (2 preceding siblings ...)
  2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-10-28 10:18                                       ` Liguzinski, WojciechX
  2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 5/5] app/test: add tests for PIE Liguzinski, WojciechX
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-28 10:18 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Added PIE related information to documentation.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 64 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 +++-
 3 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c37b78804 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v19 5/5] app/test: add tests for PIE
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
                                                         ` (3 preceding siblings ...)
  2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-10-28 10:18                                       ` Liguzinski, WojciechX
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  5 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-10-28 10:18 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Tests for PIE code added to test application.

Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 app/test/meson.build |    4 +
 app/test/test_pie.c  | 1065 ++++++++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c  |    6 +-
 lib/sched/rte_pie.h  |   17 +-
 4 files changed, 1085 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 20f36a1803..2ac716629b 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -115,6 +115,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -249,6 +250,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -300,6 +302,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -313,6 +316,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management
  2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-10-29 13:44                                         ` Thomas Monjalon
  2021-11-02 13:15                                           ` Liguzinski, WojciechX
  2021-10-29 13:57                                         ` Thomas Monjalon
  1 sibling, 1 reply; 178+ messages in thread
From: Thomas Monjalon @ 2021-10-29 13:44 UTC (permalink / raw)
  To: Liguzinski, WojciechX
  Cc: dev, jasvinder.singh, cristian.dumitrescu, megha.ajmera,
	john.mcnamara, ferruh.yigit, bruce.richardson

28/10/2021 12:17, Liguzinski, WojciechX:
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
> 
> --
> Changes in V19:
> - ACKs included in patches

Would be good to fix your own name as well.
I guess your first name is not WojciechX but Wojciech.
And we put first name first without a comma.

Also the related doc change should be in this patch.

It's OK that you don't have all knowledge of the process,
I am just disappointed that you don't get more help
from those adding your acks, so it falls on me to fix.



^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management
  2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-10-29 13:44                                         ` Thomas Monjalon
@ 2021-10-29 13:57                                         ` Thomas Monjalon
  2021-10-29 14:06                                           ` Dumitrescu, Cristian
  1 sibling, 1 reply; 178+ messages in thread
From: Thomas Monjalon @ 2021-10-29 13:57 UTC (permalink / raw)
  To: cristian.dumitrescu
  Cc: dev, jasvinder.singh, megha.ajmera, Liguzinski, WojciechX,
	david.marchand, john.mcnamara

28/10/2021 12:17, Liguzinski, WojciechX:
> Implement PIE based congestion management based on rfc8033

An explanation is missing about why RED code is removed.
I don't know whether it deserves a separate commit,
but at least it should be removed consisently in lib and example code
in the same patch.

Cristian, please make efforts to help this patchset be properly organized.
A blind ack is not enough.



^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management
  2021-10-29 13:57                                         ` Thomas Monjalon
@ 2021-10-29 14:06                                           ` Dumitrescu, Cristian
  2021-10-29 14:15                                             ` Thomas Monjalon
  0 siblings, 1 reply; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-10-29 14:06 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Singh, Jasvinder, Ajmera, Megha, Liguzinski, WojciechX,
	david.marchand, Mcnamara, John



> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Friday, October 29, 2021 2:58 PM
> To: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>
> Cc: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Ajmera,
> Megha <megha.ajmera@intel.com>; Liguzinski, WojciechX
> <wojciechx.liguzinski@intel.com>; david.marchand@redhat.com; Mcnamara,
> John <john.mcnamara@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion
> management
> 
> 28/10/2021 12:17, Liguzinski, WojciechX:
> > Implement PIE based congestion management based on rfc8033
> 
> An explanation is missing about why RED code is removed.
> I don't know whether it deserves a separate commit,
> but at least it should be removed consisently in lib and example code
> in the same patch.
> 
> Cristian, please make efforts to help this patchset be properly organized.
> A blind ack is not enough.
> 

The RED code is not removed. It is simply the macro renamed to CMAN (Congestion Management) to include both RED and PIE.

^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management
  2021-10-29 14:06                                           ` Dumitrescu, Cristian
@ 2021-10-29 14:15                                             ` Thomas Monjalon
  0 siblings, 0 replies; 178+ messages in thread
From: Thomas Monjalon @ 2021-10-29 14:15 UTC (permalink / raw)
  To: Dumitrescu, Cristian
  Cc: dev, Singh, Jasvinder, Ajmera, Megha, Liguzinski, WojciechX,
	david.marchand, Mcnamara, John, bruce.richardson

29/10/2021 16:06, Dumitrescu, Cristian:
> From: Thomas Monjalon <thomas@monjalon.net>
> > 28/10/2021 12:17, Liguzinski, WojciechX:
> > > Implement PIE based congestion management based on rfc8033
> > 
> > An explanation is missing about why RED code is removed.
> > I don't know whether it deserves a separate commit,
> > but at least it should be removed consisently in lib and example code
> > in the same patch.
> > 
> > Cristian, please make efforts to help this patchset be properly organized.
> > A blind ack is not enough.
> > 
> 
> The RED code is not removed. It is simply the macro renamed to CMAN (Congestion Management) to include both RED and PIE.

OK, and where this macro is defined?
Why is there such #ifdef?
Even the API is conditionally compiled!



^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management
  2021-10-29 13:44                                         ` Thomas Monjalon
@ 2021-11-02 13:15                                           ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-02 13:15 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Singh, Jasvinder, Dumitrescu, Cristian, Ajmera, Megha,
	Mcnamara, John, Yigit, Ferruh, Richardson, Bruce

Sure, I will correct the name.
As I can see from a distance that some things could be resolved/corrected from the very beginning (like e.g. the name and its format).


-----Original Message-----
From: Thomas Monjalon <thomas@monjalon.net> 
Sent: Friday, October 29, 2021 3:44 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Cc: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Ajmera, Megha <megha.ajmera@intel.com>; Mcnamara, John <john.mcnamara@intel.com>; Yigit, Ferruh <ferruh.yigit@intel.com>; Richardson, Bruce <bruce.richardson@intel.com>
Subject: Re: [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management

28/10/2021 12:17, Liguzinski, WojciechX:
> Implement PIE based congestion management based on rfc8033
> 
> Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
> Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
> 
> --
> Changes in V19:
> - ACKs included in patches

Would be good to fix your own name as well.
I guess your first name is not WojciechX but Wojciech.
And we put first name first without a comma.

Also the related doc change should be in this patch.

It's OK that you don't have all knowledge of the process, I am just disappointed that you don't get more help from those adding your acks, so it falls on me to fix.



^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library
  2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
                                                         ` (4 preceding siblings ...)
  2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-11-02 23:57                                       ` Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
                                                           ` (6 more replies)
  5 siblings, 7 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-02 23:57 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Wojciech Liguzinski (5):
  sched: add PIE based congestion management
  example/qos_sched: add PIE support
  example/ip_pipeline: add PIE support
  doc/guides/prog_guide: added PIE
  app/test: add tests for PIE

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  259 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 19 files changed, 2189 insertions(+), 281 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v20 1/5] sched: add PIE based congestion management
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-11-02 23:57                                         ` Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
                                                           ` (5 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-02 23:57 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Implement PIE based congestion management based on rfc8033

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>

--
Changes in V20:
- Removed API conditional compilation
- Added flag to indicate cman enabled/disabled
- Fixed submitter data in patches

Changes in V19:
- ACKs included in patches

Changes in V18:
- Resolved merge conflict in lib/sched/meson.build after rebasing ontop of main
- Reverted whitespace change in app_thread.c - comment from Stephen Hemminger

Changes in V17:
- Corrected paragraph link naming in qos_framework.rst to fix CI builds

Changes in V16:
- Fixed 'title underline too short' error in qos_framework.rst
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()

---
 drivers/net/softnic/rte_eth_softnic_tm.c |   6 +-
 lib/sched/meson.build                    |   3 +-
 lib/sched/rte_pie.c                      |  82 +++++
 lib/sched/rte_pie.h                      | 393 +++++++++++++++++++++++
 lib/sched/rte_sched.c                    | 259 ++++++++++-----
 lib/sched/rte_sched.h                    |  64 +++-
 lib/sched/version.map                    |   4 +
 7 files changed, 719 insertions(+), 92 deletions(-)
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index 8ced4547aa..df75db51ed 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -7,11 +7,12 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
 headers = files(
         'rte_approx.h',
         'rte_red.h',
         'rte_sched.h',
         'rte_sched_common.h',
+        'rte_pie.h',
 )
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..2fcecb2db4
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+void
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..f83c95664f
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * RED run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ */
+void
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static inline void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..45bba5bcb8 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,15 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	bool cman_enabled;
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1089,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,30 +1262,20 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
+#ifdef RTE_SCHED_CMAN
+		if (params->cman_params != NULL)
+		{
+			s->cman_enabled = true;
+			status = rte_sched_cman_config(port, s, params, n_subports);
+			if (status) {
+				RTE_LOG(NOTICE, SCHED,
+					"%s: CMAN configuration fails\n", __func__);
+				return status;
 			}
 		}
+		else {
+			s->cman_enabled = false;
+		}
 #endif
 
 		/* Scheduling loop detection */
@@ -1718,30 +1803,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1830,102 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	if (subport->cman_enabled)
+	{
+		qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
+	}
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
+	if (!subport->cman_enabled)
+		return 0;
+
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
-	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman_enabled)
+	{
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		if (subport->cman == RTE_SCHED_CMAN_RED) {
+			struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+			rte_red_mark_queue_empty(red, port->time);
+		}
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1934,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
+
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2040,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2513,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2533,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index cb851301e9..9727701fe3 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,10 +61,9 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
 #include "rte_red.h"
-#endif
+#include "rte_pie.h"
 
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
@@ -110,6 +109,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +160,22 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,10 +211,11 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
-#endif
+	/** Congestion Management parameters
+	 * If NULL the congestion management is disabled for the subport,
+	 * otherwise proper parameters need to be provided.
+	 */
+	struct rte_sched_cman_params *cman_params;
 };
 
 struct rte_sched_subport_profile_params {
@@ -208,10 +246,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +258,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index a6e505c8ac..d22c07fc9f 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v20 2/5] example/qos_sched: add PIE support
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-11-02 23:57                                         ` Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 3/5] example/ip_pipeline: " Liguzinski, WojciechX
                                                           ` (4 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-02 23:57 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 config/rte_config.h            |   1 -
 examples/qos_sched/cfg_file.c  | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h  |   5 +
 examples/qos_sched/init.c      |  27 +++--
 examples/qos_sched/main.h      |   3 +
 examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
 6 files changed, 250 insertions(+), 109 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 1a66b42fcc..6ec687a555 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 9b34e4a76b..3c1f0bc680 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -203,15 +203,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,7 +272,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v20 3/5] example/ip_pipeline: add PIE support
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-11-02 23:57                                         ` Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
                                                           ` (3 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-02 23:57 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Adding the PIE support for IP Pipeline

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v20 4/5] doc/guides/prog_guide: added PIE
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                           ` (2 preceding siblings ...)
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 3/5] example/ip_pipeline: " Liguzinski, WojciechX
@ 2021-11-02 23:57                                         ` Liguzinski, WojciechX
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 5/5] app/test: add tests for PIE Liguzinski, WojciechX
                                                           ` (2 subsequent siblings)
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-02 23:57 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Added PIE related information to documentation.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 doc/guides/prog_guide/glossary.rst           |  3 +
 doc/guides/prog_guide/qos_framework.rst      | 64 +++++++++++++++++---
 doc/guides/prog_guide/traffic_management.rst | 13 +++-
 3 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c37b78804 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v20 5/5] app/test: add tests for PIE
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                           ` (3 preceding siblings ...)
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
@ 2021-11-02 23:57                                         ` Liguzinski, WojciechX
  2021-11-03 17:52                                         ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Thomas Monjalon
  2021-11-04 10:40                                         ` [dpdk-dev] [PATCH v21 0/3] " Liguzinski, WojciechX
  6 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-02 23:57 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Tests for PIE code added to test application.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 app/test/meson.build |    4 +
 app/test/test_pie.c  | 1065 ++++++++++++++++++++++++++++++++++++++++++
 lib/sched/rte_pie.c  |    6 +-
 lib/sched/rte_pie.h  |   17 +-
 4 files changed, 1085 insertions(+), 7 deletions(-)
 create mode 100644 app/test/test_pie.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 3d9470df23..fe2ceee42a 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -115,6 +115,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -249,6 +250,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -300,6 +302,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -313,6 +316,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
index 2fcecb2db4..934e9aee50 100644
--- a/lib/sched/rte_pie.c
+++ b/lib/sched/rte_pie.c
@@ -13,7 +13,7 @@
 #pragma warning(disable:2259) /* conversion may lose significant bits */
 #endif
 
-void
+int
 rte_pie_rt_data_init(struct rte_pie *pie)
 {
 	if (pie == NULL) {
@@ -22,6 +22,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 
 		if (pie == NULL)
 			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
 	}
 
 	pie->active = 0;
@@ -35,6 +37,8 @@ rte_pie_rt_data_init(struct rte_pie *pie)
 	pie->qdelay_old = 0;
 	pie->drop_prob = 0;
 	pie->accu_prob = 0;
+
+	return 0;
 }
 
 int
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
index f83c95664f..68f1b96192 100644
--- a/lib/sched/rte_pie.h
+++ b/lib/sched/rte_pie.h
@@ -20,6 +20,7 @@ extern "C" {
 
 #include <rte_random.h>
 #include <rte_debug.h>
+#include <rte_cycles.h>
 
 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
 				     * to start measurement cycle (bytes)
@@ -53,7 +54,7 @@ struct rte_pie_config {
 };
 
 /**
- * RED run-time data
+ * PIE run-time data
  */
 struct rte_pie {
 	uint16_t active;               /**< Flag for activating/deactivating pie */
@@ -74,8 +75,12 @@ struct rte_pie {
  * @brief Initialises run-time data
  *
  * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
  */
-void
+int
 __rte_experimental
 rte_pie_rt_data_init(struct rte_pie *pie);
 
@@ -113,7 +118,7 @@ rte_pie_config_init(struct rte_pie_config *pie_cfg,
  * @retval 0 enqueue the packet
  * @retval !0 drop the packet
  */
-static inline int
+static int
 __rte_experimental
 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie,
@@ -145,7 +150,7 @@ rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
  * @param pie [in, out] data pointer to PIE runtime data
  * @param time [in] current time (measured in cpu cycles)
  */
-static inline void
+static void
 __rte_experimental
 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	struct rte_pie *pie, uint64_t time)
@@ -155,7 +160,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	/* Note: can be implemented using integer multiply.
 	 * DQ_THRESHOLD is power of 2 value.
 	 */
-	double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD);
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
 
 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
 		RTE_BETA * (current_qdelay - pie->qdelay_old);
@@ -181,7 +186,7 @@ _calc_drop_probability(const struct rte_pie_config *pie_cfg,
 	double qdelay = qdelay_ref * 0.5;
 
 	/*  Exponentially decay drop prob when congestion goes away  */
-	if (current_qdelay < qdelay && pie->qdelay_old < qdelay)
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
 
 	/* Bound drop probability */
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                           ` (4 preceding siblings ...)
  2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 5/5] app/test: add tests for PIE Liguzinski, WojciechX
@ 2021-11-03 17:52                                         ` Thomas Monjalon
  2021-11-04  8:29                                           ` Liguzinski, WojciechX
  2021-11-04 10:40                                         ` [dpdk-dev] [PATCH v21 0/3] " Liguzinski, WojciechX
  6 siblings, 1 reply; 178+ messages in thread
From: Thomas Monjalon @ 2021-11-03 17:52 UTC (permalink / raw)
  To: Wojciech Liguzinski
  Cc: dev, jasvinder.singh, cristian.dumitrescu, megha.ajmera, john.mcnamara

03/11/2021 00:57, Liguzinski, WojciechX:
> From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
> 
> DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
> which is a situation when excess buffers in the network cause high latency and latency
> variation. Currently, it supports RED for active queue management. However, more
> advanced queue management is required to address this problem and provide desirable
> quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
> controller Enhanced) that can effectively and directly control queuing latency to address
> the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of existing and
> adding a new set of data structures to the library, adding PIE related APIs.
> This affects structures in public API/ABI. That is why deprecation notice is going
> to be prepared and sent.
> 
> Wojciech Liguzinski (5):
>   sched: add PIE based congestion management

Did you see the checkpatch issues on this patch?
http://mails.dpdk.org/archives/test-report/2021-November/238253.html

>   example/qos_sched: add PIE support

The strict minimum is to explain why you add PIE and what the acronym means,
inside the commit log.

>   example/ip_pipeline: add PIE support

Title should follow same convention as history.
For examples, it start with "examples/" as the directory name.

>   doc/guides/prog_guide: added PIE

doc should be squashed with code patches
Is there any doc update related to the examples?
If not, it should be fully squashed with lib changes.

>   app/test: add tests for PIE

If there is nothing special, it can be squashed with the lib patch.




^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library
  2021-11-03 17:52                                         ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Thomas Monjalon
@ 2021-11-04  8:29                                           ` Liguzinski, WojciechX
  0 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04  8:29 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Singh, Jasvinder, Dumitrescu, Cristian, Ajmera, Megha,
	Mcnamara, John

Hi Thomas,

Thanks, I will apply your suggestions asap.

Wojtek

-----Original Message-----
From: Thomas Monjalon <thomas@monjalon.net> 
Sent: Wednesday, November 3, 2021 6:53 PM
To: Liguzinski, WojciechX <wojciechx.liguzinski@intel.com>
Cc: dev@dpdk.org; Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Ajmera, Megha <megha.ajmera@intel.com>; Mcnamara, John <john.mcnamara@intel.com>
Subject: Re: [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library

03/11/2021 00:57, Liguzinski, WojciechX:
> From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
> 
> DPDK sched library is equipped with mechanism that secures it from the 
> bufferbloat problem which is a situation when excess buffers in the 
> network cause high latency and latency variation. Currently, it 
> supports RED for active queue management. However, more advanced queue 
> management is required to address this problem and provide desirable quality of service to users.
> 
> This solution (RFC) proposes usage of new algorithm called "PIE" 
> (Proportional Integral controller Enhanced) that can effectively and 
> directly control queuing latency to address the bufferbloat problem.
> 
> The implementation of mentioned functionality includes modification of 
> existing and adding a new set of data structures to the library, adding PIE related APIs.
> This affects structures in public API/ABI. That is why deprecation 
> notice is going to be prepared and sent.
> 
> Wojciech Liguzinski (5):
>   sched: add PIE based congestion management

Did you see the checkpatch issues on this patch?
http://mails.dpdk.org/archives/test-report/2021-November/238253.html

>   example/qos_sched: add PIE support

The strict minimum is to explain why you add PIE and what the acronym means, inside the commit log.

>   example/ip_pipeline: add PIE support

Title should follow same convention as history.
For examples, it start with "examples/" as the directory name.

>   doc/guides/prog_guide: added PIE

doc should be squashed with code patches Is there any doc update related to the examples?
If not, it should be fully squashed with lib changes.

>   app/test: add tests for PIE

If there is nothing special, it can be squashed with the lib patch.




^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v21 0/3] Add PIE support for HQoS library
  2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
                                                           ` (5 preceding siblings ...)
  2021-11-03 17:52                                         ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Thomas Monjalon
@ 2021-11-04 10:40                                         ` Liguzinski, WojciechX
  2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
                                                             ` (3 more replies)
  6 siblings, 4 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:40 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Wojciech Liguzinski (3):
  sched: add PIE based congestion management
  examples/qos_sched: add PIE support
  examples/ip_pipeline: add PIE support

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  255 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 19 files changed, 2185 insertions(+), 281 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v21 1/3] sched: add PIE based congestion management
  2021-11-04 10:40                                         ` [dpdk-dev] [PATCH v21 0/3] " Liguzinski, WojciechX
@ 2021-11-04 10:40                                           ` Liguzinski, WojciechX
  2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
                                                             ` (2 subsequent siblings)
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:40 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu; +Cc: megha.ajmera

Implement PIE based congestion management based on rfc8033.

The Proportional Integral Controller Enhanced (PIE) algorithm works
by proactively dropping packets randomly.
PIE is implemented as more advanced queue management is required to
address the bufferbloat problem and provide desirable quality of
service to users.

Tests for PIE code added to test application.
Added PIE related information to documentation.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>

--
Changes in V21:
- Coding style fixed
- Patches reorganized according to comments

Changes in V20:
- Removed API conditional compilation
- Added flag to indicate cman enabled/disabled
- Fixed submitter data in patches

Changes in V19:
- ACKs included in patches

Changes in V18:
- Resolved merge conflict in lib/sched/meson.build after rebasing ontop of main
- Reverted whitespace change in app_thread.c - comment from Stephen Hemminger

Changes in V17:
- Corrected paragraph link naming in qos_framework.rst to fix CI builds

Changes in V16:
- Fixed 'title underline too short' error in qos_framework.rst
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()

---
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  255 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 12 files changed, 1861 insertions(+), 104 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/app/test/meson.build b/app/test/meson.build
index 3d9470df23..fe2ceee42a 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -115,6 +115,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -249,6 +250,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -300,6 +302,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -313,6 +316,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+									  * (milliseconds)
+									  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600,	900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c37b78804 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady-state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index 8ced4547aa..df75db51ed 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -7,11 +7,12 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
 headers = files(
         'rte_approx.h',
         'rte_red.h',
         'rte_sched.h',
         'rte_sched_common.h',
+        'rte_pie.h',
 )
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..934e9aee50
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+int
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+
+	return 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..68f1b96192
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..807c6a4807 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,15 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	bool cman_enabled;
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1089,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,17 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
+#ifdef RTE_SCHED_CMAN
+		if (params->cman_params != NULL) {
+			s->cman_enabled = true;
+			status = rte_sched_cman_config(port, s, params, n_subports);
+			if (status) {
+				RTE_LOG(NOTICE, SCHED,
+					"%s: CMAN configuration fails\n", __func__);
+				return status;
 			}
+		} else {
+			s->cman_enabled = false;
 		}
 #endif
 
@@ -1718,30 +1801,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1828,100 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	if (subport->cman_enabled) {
+		qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
+	}
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
+	if (!subport->cman_enabled)
+		return 0;
+
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
-	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman_enabled) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		if (subport->cman == RTE_SCHED_CMAN_RED) {
+			struct rte_red *red = &qe->red;
+
+			rte_red_mark_queue_empty(red, port->time);
+		}
+	}
+}
 
-	rte_red_mark_queue_empty(red, port->time);
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1930,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
+
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2036,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2509,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2529,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index cb851301e9..9727701fe3 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,10 +61,9 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
 #include "rte_red.h"
-#endif
+#include "rte_pie.h"
 
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
@@ -110,6 +109,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +160,22 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,10 +211,11 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
-#endif
+	/** Congestion Management parameters
+	 * If NULL the congestion management is disabled for the subport,
+	 * otherwise proper parameters need to be provided.
+	 */
+	struct rte_sched_cman_params *cman_params;
 };
 
 struct rte_sched_subport_profile_params {
@@ -208,10 +246,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +258,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index a6e505c8ac..d22c07fc9f 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v21 2/3] examples/qos_sched: add PIE support
  2021-11-04 10:40                                         ` [dpdk-dev] [PATCH v21 0/3] " Liguzinski, WojciechX
  2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-11-04 10:40                                           ` Liguzinski, WojciechX
  2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
  2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:40 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 config/rte_config.h            |   1 -
 examples/qos_sched/cfg_file.c  | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h  |   5 +
 examples/qos_sched/init.c      |  27 +++--
 examples/qos_sched/main.h      |   3 +
 examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
 6 files changed, 250 insertions(+), 109 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 1a66b42fcc..6ec687a555 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 9b34e4a76b..3c1f0bc680 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -203,15 +203,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,7 +272,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v21 3/3] examples/ip_pipeline: add PIE support
  2021-11-04 10:40                                         ` [dpdk-dev] [PATCH v21 0/3] " Liguzinski, WojciechX
  2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-11-04 10:40                                           ` Liguzinski, WojciechX
  2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  3 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:40 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Adding the PIE support for IP Pipeline

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library
  2021-11-04 10:40                                         ` [dpdk-dev] [PATCH v21 0/3] " Liguzinski, WojciechX
                                                             ` (2 preceding siblings ...)
  2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
@ 2021-11-04 10:49                                           ` Liguzinski, WojciechX
  2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
                                                               ` (4 more replies)
  3 siblings, 5 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:49 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Wojciech Liguzinski (3):
  sched: add PIE based congestion management
  examples/qos_sched: add PIE support
  examples/ip_pipeline: add PIE support

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  255 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 19 files changed, 2185 insertions(+), 281 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v22 1/3] sched: add PIE based congestion management
  2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-11-04 10:49                                             ` Liguzinski, WojciechX
  2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
                                                               ` (3 subsequent siblings)
  4 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:49 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Implement PIE based congestion management based on rfc8033.

The Proportional Integral Controller Enhanced (PIE) algorithm works
by proactively dropping packets randomly.
PIE is implemented as more advanced queue management is required to
address the bufferbloat problem and provide desirable quality of
service to users.

Tests for PIE code added to test application.
Added PIE related information to documentation.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>

--
Changes in V22:
- Coding style fixed

Changes in V21:
- Coding style fixed
- Patches reorganized according to comments

Changes in V20:
- Removed API conditional compilation
- Added flag to indicate cman enabled/disabled
- Fixed submitter data in patches

Changes in V19:
- ACKs included in patches

Changes in V18:
- Resolved merge conflict in lib/sched/meson.build after rebasing ontop of main
- Reverted whitespace change in app_thread.c - comment from Stephen Hemminger

Changes in V17:
- Corrected paragraph link naming in qos_framework.rst to fix CI builds

Changes in V16:
- Fixed 'title underline too short' error in qos_framework.rst
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()

---
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  255 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 12 files changed, 1861 insertions(+), 104 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/app/test/meson.build b/app/test/meson.build
index 3d9470df23..fe2ceee42a 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -115,6 +115,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -249,6 +250,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -300,6 +302,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -313,6 +316,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c37b78804 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index 8ced4547aa..df75db51ed 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -7,11 +7,12 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
 headers = files(
         'rte_approx.h',
         'rte_red.h',
         'rte_sched.h',
         'rte_sched_common.h',
+        'rte_pie.h',
 )
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..934e9aee50
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+int
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+
+	return 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..68f1b96192
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..807c6a4807 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,15 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	bool cman_enabled;
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1089,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,17 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
+#ifdef RTE_SCHED_CMAN
+		if (params->cman_params != NULL) {
+			s->cman_enabled = true;
+			status = rte_sched_cman_config(port, s, params, n_subports);
+			if (status) {
+				RTE_LOG(NOTICE, SCHED,
+					"%s: CMAN configuration fails\n", __func__);
+				return status;
 			}
+		} else {
+			s->cman_enabled = false;
 		}
 #endif
 
@@ -1718,30 +1801,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1828,100 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	if (subport->cman_enabled) {
+		qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
+	}
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
+	if (!subport->cman_enabled)
+		return 0;
+
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
-	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman_enabled) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		if (subport->cman == RTE_SCHED_CMAN_RED) {
+			struct rte_red *red = &qe->red;
+
+			rte_red_mark_queue_empty(red, port->time);
+		}
+	}
+}
 
-	rte_red_mark_queue_empty(red, port->time);
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1930,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
+
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2036,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2509,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2529,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index cb851301e9..9727701fe3 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,10 +61,9 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
 #include "rte_red.h"
-#endif
+#include "rte_pie.h"
 
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
@@ -110,6 +109,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +160,22 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,10 +211,11 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
-#endif
+	/** Congestion Management parameters
+	 * If NULL the congestion management is disabled for the subport,
+	 * otherwise proper parameters need to be provided.
+	 */
+	struct rte_sched_cman_params *cman_params;
 };
 
 struct rte_sched_subport_profile_params {
@@ -208,10 +246,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +258,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index a6e505c8ac..d22c07fc9f 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v22 2/3] examples/qos_sched: add PIE support
  2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-11-04 10:49                                             ` Liguzinski, WojciechX
  2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
                                                               ` (2 subsequent siblings)
  4 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:49 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 config/rte_config.h            |   1 -
 examples/qos_sched/cfg_file.c  | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h  |   5 +
 examples/qos_sched/init.c      |  27 +++--
 examples/qos_sched/main.h      |   3 +
 examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
 6 files changed, 250 insertions(+), 109 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 1a66b42fcc..6ec687a555 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 9b34e4a76b..3c1f0bc680 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -203,15 +203,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,7 +272,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v22 3/3] examples/ip_pipeline: add PIE support
  2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-11-04 10:49                                             ` Liguzinski, WojciechX
  2021-11-04 11:03                                             ` [dpdk-dev] [PATCH v23 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-11-04 14:55                                             ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
  4 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 10:49 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Adding the PIE support for IP Pipeline

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v23 0/3] Add PIE support for HQoS library
  2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                                                               ` (2 preceding siblings ...)
  2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
@ 2021-11-04 11:03                                             ` Liguzinski, WojciechX
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
                                                                 ` (2 more replies)
  2021-11-04 14:55                                             ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
  4 siblings, 3 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 11:03 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Wojciech Liguzinski (3):
  sched: add PIE based congestion management
  examples/qos_sched: add PIE support
  examples/ip_pipeline: add PIE support

 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    1 -
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  254 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 19 files changed, 2184 insertions(+), 281 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management
  2021-11-04 11:03                                             ` [dpdk-dev] [PATCH v23 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-11-04 11:03                                               ` Liguzinski, WojciechX
  2021-11-04 13:58                                                 ` Thomas Monjalon
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
  2 siblings, 1 reply; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 11:03 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Implement PIE based congestion management based on rfc8033.

The Proportional Integral Controller Enhanced (PIE) algorithm works
by proactively dropping packets randomly.
PIE is implemented as more advanced queue management is required to
address the bufferbloat problem and provide desirable quality of
service to users.

Tests for PIE code added to test application.
Added PIE related information to documentation.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>

--
Changes in V23:
- Coding style fixed

Changes in V22:
- Coding style fixed

Changes in V21:
- Coding style fixed
- Patches reorganized according to comments

Changes in V20:
- Removed API conditional compilation
- Added flag to indicate cman enabled/disabled
- Fixed submitter data in patches

Changes in V19:
- ACKs included in patches

Changes in V18:
- Resolved merge conflict in lib/sched/meson.build after rebasing ontop of main
- Reverted whitespace change in app_thread.c - comment from Stephen Hemminger

Changes in V17:
- Corrected paragraph link naming in qos_framework.rst to fix CI builds

Changes in V16:
- Fixed 'title underline too short' error in qos_framework.rst
- Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()

---
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  398 +++++++
 lib/sched/rte_sched.c                        |  254 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 12 files changed, 1860 insertions(+), 104 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/app/test/meson.build b/app/test/meson.build
index 0c46cb064e..96670c3504 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -115,6 +115,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -250,6 +251,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -301,6 +303,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -314,6 +317,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..dfa69d1c7e
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..7c37b78804 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index 8ced4547aa..df75db51ed 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -7,11 +7,12 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
 headers = files(
         'rte_approx.h',
         'rte_red.h',
         'rte_sched.h',
         'rte_sched_common.h',
+        'rte_pie.h',
 )
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..934e9aee50
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+int
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+
+	return 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..68f1b96192
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Proportional Integral controller Enhanced (PIE)
+ *
+ *
+ ***/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..2fe32bbd33 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,15 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	bool cman_enabled;
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1089,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,17 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
+#ifdef RTE_SCHED_CMAN
+		if (params->cman_params != NULL) {
+			s->cman_enabled = true;
+			status = rte_sched_cman_config(port, s, params, n_subports);
+			if (status) {
+				RTE_LOG(NOTICE, SCHED,
+					"%s: CMAN configuration fails\n", __func__);
+				return status;
 			}
+		} else {
+			s->cman_enabled = false;
 		}
 #endif
 
@@ -1718,30 +1801,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1828,99 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	if (subport->cman_enabled)
+		qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
+	if (!subport->cman_enabled)
+		return 0;
+
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
+	qe = subport->queue_extra + qindex;
 
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
 
-	qe = subport->queue_extra + qindex;
-	red = &qe->red;
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
-	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman_enabled) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		if (subport->cman == RTE_SCHED_CMAN_RED) {
+			struct rte_red *red = &qe->red;
+
+			rte_red_mark_queue_empty(red, port->time);
+		}
+	}
+}
 
-	rte_red_mark_queue_empty(red, port->time);
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1929,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
+
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2035,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2508,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2528,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index cb851301e9..9727701fe3 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,10 +61,9 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
 #include "rte_red.h"
-#endif
+#include "rte_pie.h"
 
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
@@ -110,6 +109,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE,  /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +160,22 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,10 +211,11 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
-#endif
+	/** Congestion Management parameters
+	 * If NULL the congestion management is disabled for the subport,
+	 * otherwise proper parameters need to be provided.
+	 */
+	struct rte_sched_cman_params *cman_params;
 };
 
 struct rte_sched_subport_profile_params {
@@ -208,10 +246,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +258,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index a6e505c8ac..d22c07fc9f 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v23 2/3] examples/qos_sched: add PIE support
  2021-11-04 11:03                                             ` [dpdk-dev] [PATCH v23 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-11-04 11:03                                               ` Liguzinski, WojciechX
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
  2 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 11:03 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 config/rte_config.h            |   1 -
 examples/qos_sched/cfg_file.c  | 127 +++++++++++++++------
 examples/qos_sched/cfg_file.h  |   5 +
 examples/qos_sched/init.c      |  27 +++--
 examples/qos_sched/main.h      |   3 +
 examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
 6 files changed, 250 insertions(+), 109 deletions(-)

diff --git a/config/rte_config.h b/config/rte_config.h
index 1a66b42fcc..6ec687a555 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,6 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -242,25 +276,26 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,7 +350,44 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,19 +465,8 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+#ifdef RTE_SCHED_CMAN
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 9b34e4a76b..3c1f0bc680 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -203,15 +203,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,7 +272,20 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v23 3/3] examples/ip_pipeline: add PIE support
  2021-11-04 11:03                                             ` [dpdk-dev] [PATCH v23 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
@ 2021-11-04 11:03                                               ` Liguzinski, WojciechX
  2 siblings, 0 replies; 178+ messages in thread
From: Liguzinski, WojciechX @ 2021-11-04 11:03 UTC (permalink / raw)
  To: dev, jasvinder.singh, cristian.dumitrescu
  Cc: megha.ajmera, Wojciech Liguzinski

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Adding the PIE support for IP Pipeline

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 examples/ip_pipeline/tmgr.c | 142 +++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -25,74 +96,9 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
-#endif /* RTE_SCHED_RED */
+#ifdef RTE_SCHED_CMAN
+	.cman_params = &cman_params,
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
-- 
2.25.1


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management
  2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
@ 2021-11-04 13:58                                                 ` Thomas Monjalon
  2021-11-04 14:24                                                   ` Dumitrescu, Cristian
  0 siblings, 1 reply; 178+ messages in thread
From: Thomas Monjalon @ 2021-11-04 13:58 UTC (permalink / raw)
  To: jasvinder.singh, cristian.dumitrescu, Wojciech Liguzinski
  Cc: dev, megha.ajmera, Liguzinski, WojciechX

04/11/2021 12:03, Liguzinski, WojciechX:
> From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
> 
> Implement PIE based congestion management based on rfc8033.
> 
> The Proportional Integral Controller Enhanced (PIE) algorithm works
> by proactively dropping packets randomly.
> PIE is implemented as more advanced queue management is required to
> address the bufferbloat problem and provide desirable quality of
> service to users.
> 
> Tests for PIE code added to test application.
> Added PIE related information to documentation.
> 
> Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
> Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
> 
> --

It should be 3 dashes to make the below hidden in git.

> Changes in V23:
> - Coding style fixed
> 
> Changes in V22:
> - Coding style fixed
> 
> Changes in V21:
> - Coding style fixed
> - Patches reorganized according to comments
> 
> Changes in V20:
> - Removed API conditional compilation
> - Added flag to indicate cman enabled/disabled
> - Fixed submitter data in patches
> 
> Changes in V19:
> - ACKs included in patches
> 
> Changes in V18:
> - Resolved merge conflict in lib/sched/meson.build after rebasing ontop of main
> - Reverted whitespace change in app_thread.c - comment from Stephen Hemminger
> 
> Changes in V17:
> - Corrected paragraph link naming in qos_framework.rst to fix CI builds
> 
> Changes in V16:
> - Fixed 'title underline too short' error in qos_framework.rst
> - Applied __rte_unused macro to parameters in rte_sched_port_pie_dequeue()
> 
> ---
>  app/test/meson.build                         |    4 +
>  app/test/test_pie.c                          | 1065 ++++++++++++++++++
>  doc/guides/prog_guide/glossary.rst           |    3 +
>  doc/guides/prog_guide/qos_framework.rst      |   64 +-
>  doc/guides/prog_guide/traffic_management.rst |   13 +-
>  drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
>  lib/sched/meson.build                        |    3 +-
>  lib/sched/rte_pie.c                          |   86 ++
>  lib/sched/rte_pie.h                          |  398 +++++++
>  lib/sched/rte_sched.c                        |  254 +++--
>  lib/sched/rte_sched.h                        |   64 +-
>  lib/sched/version.map                        |    4 +
>  12 files changed, 1860 insertions(+), 104 deletions(-)
>  create mode 100644 app/test/test_pie.c
>  create mode 100644 lib/sched/rte_pie.c
>  create mode 100644 lib/sched/rte_pie.h

Adding this change to include the new test in sched library maintainership:

--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1428,6 +1428,7 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 M: Jasvinder Singh <jasvinder.singh@intel.com>
 F: lib/sched/
 F: doc/guides/prog_guide/qos_framework.rst
+F: app/test/test_pie.c
 F: app/test/test_red.c
 F: app/test/test_sched.c
 F: examples/qos_sched/

Cristian, Jasvinder, I didn't see a lot of comments from you on this patch,
so I assume you are OK with this change.



^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management
  2021-11-04 13:58                                                 ` Thomas Monjalon
@ 2021-11-04 14:24                                                   ` Dumitrescu, Cristian
  0 siblings, 0 replies; 178+ messages in thread
From: Dumitrescu, Cristian @ 2021-11-04 14:24 UTC (permalink / raw)
  To: Thomas Monjalon, Singh, Jasvinder, Liguzinski, WojciechX
  Cc: dev, Ajmera, Megha, Liguzinski, WojciechX



> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Thursday, November 4, 2021 1:58 PM
> To: Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian
> <cristian.dumitrescu@intel.com>; Liguzinski, WojciechX
> <wojciechx.liguzinski@intel.com>
> Cc: dev@dpdk.org; Ajmera, Megha <megha.ajmera@intel.com>; Liguzinski,
> WojciechX <wojciechx.liguzinski@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion
> management
> 
> 04/11/2021 12:03, Liguzinski, WojciechX:
> > From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
> >
> > Implement PIE based congestion management based on rfc8033.
> >
> > The Proportional Integral Controller Enhanced (PIE) algorithm works
> > by proactively dropping packets randomly.
> > PIE is implemented as more advanced queue management is required to
> > address the bufferbloat problem and provide desirable quality of
> > service to users.
> >
> > Tests for PIE code added to test application.
> > Added PIE related information to documentation.
> >
> > Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
> > Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> > Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
> >
> > --
> 
> It should be 3 dashes to make the below hidden in git.
> 
> > Changes in V23:
> > - Coding style fixed
> >
> > Changes in V22:
> > - Coding style fixed
> >
> > Changes in V21:
> > - Coding style fixed
> > - Patches reorganized according to comments
> >
> > Changes in V20:
> > - Removed API conditional compilation
> > - Added flag to indicate cman enabled/disabled
> > - Fixed submitter data in patches
> >
> > Changes in V19:
> > - ACKs included in patches
> >
> > Changes in V18:
> > - Resolved merge conflict in lib/sched/meson.build after rebasing ontop of
> main
> > - Reverted whitespace change in app_thread.c - comment from Stephen
> Hemminger
> >
> > Changes in V17:
> > - Corrected paragraph link naming in qos_framework.rst to fix CI builds
> >
> > Changes in V16:
> > - Fixed 'title underline too short' error in qos_framework.rst
> > - Applied __rte_unused macro to parameters in
> rte_sched_port_pie_dequeue()
> >
> > ---
> >  app/test/meson.build                         |    4 +
> >  app/test/test_pie.c                          | 1065 ++++++++++++++++++
> >  doc/guides/prog_guide/glossary.rst           |    3 +
> >  doc/guides/prog_guide/qos_framework.rst      |   64 +-
> >  doc/guides/prog_guide/traffic_management.rst |   13 +-
> >  drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
> >  lib/sched/meson.build                        |    3 +-
> >  lib/sched/rte_pie.c                          |   86 ++
> >  lib/sched/rte_pie.h                          |  398 +++++++
> >  lib/sched/rte_sched.c                        |  254 +++--
> >  lib/sched/rte_sched.h                        |   64 +-
> >  lib/sched/version.map                        |    4 +
> >  12 files changed, 1860 insertions(+), 104 deletions(-)
> >  create mode 100644 app/test/test_pie.c
> >  create mode 100644 lib/sched/rte_pie.c
> >  create mode 100644 lib/sched/rte_pie.h
> 
> Adding this change to include the new test in sched library maintainership:
> 
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1428,6 +1428,7 @@ M: Cristian Dumitrescu
> <cristian.dumitrescu@intel.com>
>  M: Jasvinder Singh <jasvinder.singh@intel.com>
>  F: lib/sched/
>  F: doc/guides/prog_guide/qos_framework.rst
> +F: app/test/test_pie.c
>  F: app/test/test_red.c
>  F: app/test/test_sched.c
>  F: examples/qos_sched/
> 
> Cristian, Jasvinder, I didn't see a lot of comments from you on this patch,
> so I assume you are OK with this change.
> 

Hi Thomas,

Yes, I am.

Regards,
Cristian

^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library
  2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
                                                               ` (3 preceding siblings ...)
  2021-11-04 11:03                                             ` [dpdk-dev] [PATCH v23 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
@ 2021-11-04 14:55                                             ` Thomas Monjalon
  2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 1/3] sched: add PIE based congestion management Thomas Monjalon
                                                                 ` (3 more replies)
  4 siblings, 4 replies; 178+ messages in thread
From: Thomas Monjalon @ 2021-11-04 14:55 UTC (permalink / raw)
  To: dev; +Cc: megha.ajmera

last changes to make this series "more acceptable":
- RTE_SCHED_CMAN in rte_config.h, replacing RTE_SCHED_RED
- test file listed in MAINTAINERS
- few whitespaces fixed


From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

DPDK sched library is equipped with mechanism that secures it from the bufferbloat problem
which is a situation when excess buffers in the network cause high latency and latency
variation. Currently, it supports RED for active queue management. However, more
advanced queue management is required to address this problem and provide desirable
quality of service to users.

This solution (RFC) proposes usage of new algorithm called "PIE" (Proportional Integral
controller Enhanced) that can effectively and directly control queuing latency to address
the bufferbloat problem.

The implementation of mentioned functionality includes modification of existing and
adding a new set of data structures to the library, adding PIE related APIs.
This affects structures in public API/ABI. That is why deprecation notice is going
to be prepared and sent.

Wojciech Liguzinski (3):
  sched: add PIE based congestion management
  examples/qos_sched: support PIE congestion management
  examples/ip_pipeline: support PIE congestion management

 MAINTAINERS                                  |    1 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    2 +-
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |  142 +--
 examples/qos_sched/cfg_file.c                |  127 ++-
 examples/qos_sched/cfg_file.h                |    5 +
 examples/qos_sched/init.c                    |   27 +-
 examples/qos_sched/main.h                    |    3 +
 examples/qos_sched/profile.cfg               |  196 ++--
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  396 +++++++
 lib/sched/rte_sched.c                        |  256 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 20 files changed, 2185 insertions(+), 282 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

-- 
2.33.0


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v24 1/3] sched: add PIE based congestion management
  2021-11-04 14:55                                             ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
@ 2021-11-04 14:55                                               ` Thomas Monjalon
  2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 2/3] examples/qos_sched: support PIE " Thomas Monjalon
                                                                 ` (2 subsequent siblings)
  3 siblings, 0 replies; 178+ messages in thread
From: Thomas Monjalon @ 2021-11-04 14:55 UTC (permalink / raw)
  To: dev
  Cc: megha.ajmera, Wojciech Liguzinski, Cristian Dumitrescu,
	Jasvinder Singh, Bruce Richardson, Ray Kinsella

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Implement PIE based congestion management based on rfc8033.

The Proportional Integral Controller Enhanced (PIE) algorithm works
by proactively dropping packets randomly.
PIE is implemented as more advanced queue management is required to
address the bufferbloat problem and provide desirable quality of
service to users.

Tests for PIE code added to test application.
Added PIE related information to documentation.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 MAINTAINERS                                  |    1 +
 app/test/meson.build                         |    4 +
 app/test/test_pie.c                          | 1065 ++++++++++++++++++
 config/rte_config.h                          |    2 +-
 doc/guides/prog_guide/glossary.rst           |    3 +
 doc/guides/prog_guide/qos_framework.rst      |   64 +-
 doc/guides/prog_guide/traffic_management.rst |   13 +-
 drivers/net/softnic/rte_eth_softnic_tm.c     |    6 +-
 examples/ip_pipeline/tmgr.c                  |    4 +-
 examples/qos_sched/cfg_file.c                |    6 +-
 examples/qos_sched/init.c                    |    4 +-
 lib/sched/meson.build                        |    3 +-
 lib/sched/rte_pie.c                          |   86 ++
 lib/sched/rte_pie.h                          |  396 +++++++
 lib/sched/rte_sched.c                        |  256 +++--
 lib/sched/rte_sched.h                        |   64 +-
 lib/sched/version.map                        |    4 +
 17 files changed, 1868 insertions(+), 113 deletions(-)
 create mode 100644 app/test/test_pie.c
 create mode 100644 lib/sched/rte_pie.c
 create mode 100644 lib/sched/rte_pie.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 0e5951f8f1..2629960df0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1428,6 +1428,7 @@ M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
 M: Jasvinder Singh <jasvinder.singh@intel.com>
 F: lib/sched/
 F: doc/guides/prog_guide/qos_framework.rst
+F: app/test/test_pie.c
 F: app/test/test_red.c
 F: app/test/test_sched.c
 F: examples/qos_sched/
diff --git a/app/test/meson.build b/app/test/meson.build
index 0c46cb064e..96670c3504 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -115,6 +115,7 @@ test_sources = files(
         'test_reciprocal_division.c',
         'test_reciprocal_division_perf.c',
         'test_red.c',
+        'test_pie.c',
         'test_reorder.c',
         'test_rib.c',
         'test_rib6.c',
@@ -250,6 +251,7 @@ fast_tests = [
         ['prefetch_autotest', true],
         ['rcu_qsbr_autotest', true],
         ['red_autotest', true],
+        ['pie_autotest', true],
         ['rib_autotest', true],
         ['rib6_autotest', true],
         ['ring_autotest', true],
@@ -301,6 +303,7 @@ perf_test_names = [
         'fib_slow_autotest',
         'fib_perf_autotest',
         'red_all',
+        'pie_all',
         'barrier_autotest',
         'hash_multiwriter_autotest',
         'timer_racecond_autotest',
@@ -314,6 +317,7 @@ perf_test_names = [
         'fib6_perf_autotest',
         'rcu_qsbr_perf_autotest',
         'red_perf',
+        'pie_perf',
         'distributor_perf_autotest',
         'pmd_perf_autotest',
         'stack_perf_autotest',
diff --git a/app/test/test_pie.c b/app/test/test_pie.c
new file mode 100644
index 0000000000..632d4b014d
--- /dev/null
+++ b/app/test/test_pie.c
@@ -0,0 +1,1065 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#include "test.h"
+
+#include <rte_pie.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259)       /* conversion may lose significant bits */
+#pragma warning(disable:181)        /* Arg incompatible with format string */
+#endif
+
+/**< structures for testing rte_pie performance and function */
+struct test_rte_pie_config {        /**< Test structure for RTE_PIE config */
+	struct rte_pie_config *pconfig; /**< RTE_PIE configuration parameters */
+	uint8_t num_cfg;                /**< Number of RTE_PIE configs to test */
+	uint16_t qdelay_ref;            /**< Latency Target (milliseconds) */
+	uint16_t *dp_update_interval;   /**< Update interval for drop probability
+					  * (milliseconds)
+					  */
+	uint16_t *max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;              /**< Tailq drop threshold (packet counts) */
+};
+
+struct test_queue {                 /**< Test structure for RTE_PIE Queues */
+	struct rte_pie *pdata_in;       /**< RTE_PIE runtime data input */
+	struct rte_pie *pdata_out;		/**< RTE_PIE runtime data output*/
+	uint32_t num_queues;            /**< Number of RTE_PIE queues to test */
+	uint32_t *qlen;                 /**< Queue size */
+	uint32_t q_ramp_up;             /**< Num of enqueues to ramp up the queue */
+	double drop_tolerance;          /**< Drop tolerance of packets not enqueued */
+};
+
+struct test_var {                   /**< Test variables used for testing RTE_PIE */
+	uint32_t num_iterations;        /**< Number of test iterations */
+	uint32_t num_ops;               /**< Number of test operations */
+	uint64_t clk_freq;              /**< CPU clock frequency */
+	uint32_t *dropped;              /**< Test operations dropped */
+	uint32_t *enqueued;             /**< Test operations enqueued */
+	uint32_t *dequeued;             /**< Test operations dequeued */
+};
+
+struct test_config {                /**< Primary test structure for RTE_PIE */
+	const char *ifname;             /**< Interface name */
+	const char *msg;                /**< Test message for display */
+	const char *htxt;               /**< Header txt display for result output */
+	struct test_rte_pie_config *tconfig; /**< Test structure for RTE_PIE config */
+	struct test_queue *tqueue;      /**< Test structure for RTE_PIE Queues */
+	struct test_var *tvar;          /**< Test variables used for testing RTE_PIE */
+	uint32_t *tlevel;               /**< Queue levels */
+};
+
+enum test_result {
+	FAIL = 0,
+	PASS
+};
+
+/**< Test structure to define tests to run */
+struct tests {
+	struct test_config *testcfg;
+	enum test_result (*testfn)(struct test_config *cfg);
+};
+
+struct rdtsc_prof {
+	uint64_t clk_start;
+	uint64_t clk_min;               /**< min clocks */
+	uint64_t clk_max;               /**< max clocks */
+	uint64_t clk_avgc;              /**< count to calc average */
+	double clk_avg;                 /**< cumulative sum to calc average */
+	const char *name;
+};
+
+static const uint64_t port_speed_bytes = (10ULL*1000ULL*1000ULL*1000ULL)/8ULL;
+static double inv_cycles_per_byte;
+
+static void init_port_ts(uint64_t cpu_clock)
+{
+	double cycles_per_byte = (double)(cpu_clock) / (double)(port_speed_bytes);
+	inv_cycles_per_byte = 1.0 / cycles_per_byte;
+}
+
+static uint64_t get_port_ts(void)
+{
+	return (uint64_t)((double)rte_rdtsc() * inv_cycles_per_byte);
+}
+
+static void rdtsc_prof_init(struct rdtsc_prof *p, const char *name)
+{
+	p->clk_min = (uint64_t)(-1LL);
+	p->clk_max = 0;
+	p->clk_avg = 0;
+	p->clk_avgc = 0;
+	p->name = name;
+}
+
+static inline void rdtsc_prof_start(struct rdtsc_prof *p)
+{
+	p->clk_start = rte_rdtsc_precise();
+}
+
+static inline void rdtsc_prof_end(struct rdtsc_prof *p)
+{
+	uint64_t clk_start = rte_rdtsc() - p->clk_start;
+
+	p->clk_avgc++;
+	p->clk_avg += (double) clk_start;
+
+	if (clk_start > p->clk_max)
+		p->clk_max = clk_start;
+	if (clk_start < p->clk_min)
+		p->clk_min = clk_start;
+}
+
+static void rdtsc_prof_print(struct rdtsc_prof *p)
+{
+	if (p->clk_avgc > 0) {
+		printf("RDTSC stats for %s: n=%" PRIu64 ", min=%" PRIu64
+						",max=%" PRIu64 ", avg=%.1f\n",
+			p->name,
+			p->clk_avgc,
+			p->clk_min,
+			p->clk_max,
+			(p->clk_avg / ((double) p->clk_avgc)));
+	}
+}
+
+static uint16_t rte_pie_get_active(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	return pie->active;
+}
+
+static void rte_pie_set_active(const struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint16_t active)
+{
+    /**< Flag for activating/deactivating pie */
+	RTE_SET_USED(pie_cfg);
+	pie->active = active;
+}
+
+/**
+ * Read the drop probability
+ */
+static double rte_pie_get_drop_prob(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->drop_prob;
+}
+
+static double rte_pie_get_avg_dq_time(const struct rte_pie_config *pie_cfg,
+				    struct rte_pie *pie)
+{
+    /**< Current packet drop probability */
+	RTE_SET_USED(pie_cfg);
+	return pie->avg_dq_time;
+}
+
+static double calc_drop_rate(uint32_t enqueued, uint32_t dropped)
+{
+	return (double)dropped / ((double)enqueued + (double)dropped);
+}
+
+/**
+ *  check if drop rate matches drop probability within tolerance
+ */
+static int check_drop_rate(double *diff, double drop_rate, double drop_prob,
+							double tolerance)
+{
+	double abs_diff = 0.0;
+	int ret = 1;
+
+	abs_diff = fabs(drop_rate - drop_prob);
+	if ((int)abs_diff == 0) {
+		*diff = 0.0;
+	} else {
+		*diff = (abs_diff / drop_prob) * 100.0;
+		if (*diff > tolerance)
+			ret = 0;
+	}
+	return ret;
+}
+
+/**
+ * initialize the test rte_pie config
+ */
+static enum test_result
+test_rte_pie_init(struct test_config *tcfg)
+{
+	unsigned int i = 0;
+
+	tcfg->tvar->clk_freq = rte_get_timer_hz();
+	init_port_ts(tcfg->tvar->clk_freq);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		if (rte_pie_config_init(&tcfg->tconfig->pconfig[i],
+					(uint16_t)tcfg->tconfig->qdelay_ref,
+					(uint16_t)tcfg->tconfig->dp_update_interval[i],
+					(uint16_t)tcfg->tconfig->max_burst[i],
+					(uint16_t)tcfg->tconfig->tailq_th) != 0) {
+			return FAIL;
+		}
+	}
+
+	*tcfg->tqueue->qlen = 0;
+	*tcfg->tvar->dropped = 0;
+	*tcfg->tvar->enqueued = 0;
+
+	return PASS;
+}
+
+/**
+ * enqueue until actual queue size reaches target level
+ */
+static int
+increase_qsize(struct rte_pie_config *pie_cfg,
+				struct rte_pie *pie,
+				uint32_t *qlen,
+				uint32_t pkt_len,
+				uint32_t attempts)
+{
+	uint32_t i = 0;
+
+		for (i = 0; i < attempts; i++) {
+			int ret = 0;
+
+			/**
+			 * enqueue
+			 */
+			ret = rte_pie_enqueue(pie_cfg, pie, *qlen, pkt_len, get_port_ts());
+			/**
+			 * check if target actual queue size has been reached
+			 */
+			if (ret == 0)
+				return 0;
+		}
+		/**
+		 * no success
+		 */
+		return -1;
+}
+
+/**
+ * functional test enqueue/dequeue packets
+ */
+static void
+enqueue_dequeue_func(struct rte_pie_config *pie_cfg,
+					struct rte_pie *pie,
+					uint32_t *qlen,
+					uint32_t num_ops,
+					uint32_t *enqueued,
+					uint32_t *dropped)
+{
+	uint32_t i = 0;
+
+	for (i = 0; i < num_ops; i++) {
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ret = rte_pie_enqueue(pie_cfg, pie, *qlen, sizeof(uint32_t),
+							get_port_ts());
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+	}
+}
+
+/**
+ * setup default values for the Functional test structures
+ */
+static struct rte_pie_config ft_wpconfig[1];
+static struct rte_pie ft_rtdata[1];
+static uint32_t  ft_q[] = {0};
+static uint32_t  ft_dropped[] = {0};
+static uint32_t  ft_enqueued[] = {0};
+static uint16_t ft_max_burst[] = {64};
+static uint16_t ft_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config ft_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft_q,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F1: functional test 1
+ */
+static uint32_t ft_tlevels[] =  {6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66,
+				72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144};
+
+static struct test_config func_test_config1 = {
+	.ifname = "functional test interface",
+	.msg = "functional test : use one pie configuration\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft_tlevels,
+};
+
+static enum test_result func_test1(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * Test F2: functional test 2
+ */
+static uint32_t ft2_tlevel[] = {127};
+static uint16_t ft2_max_burst[] = {1, 2, 8, 16, 32, 64, 128, 256, 512, 1024};
+static uint16_t ft2_dp_update_interval[] = {
+				10, 20, 50, 150, 300, 600, 900, 1200, 1500, 3000};
+static struct rte_pie_config ft2_pconfig[10];
+
+static struct test_rte_pie_config ft2_tconfig =  {
+	.pconfig = ft2_pconfig,
+	.num_cfg = RTE_DIM(ft2_pconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft2_dp_update_interval,
+	.max_burst = ft2_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_config func_test_config2 = {
+	.ifname = "functional test 2 interface",
+	.msg = "functional test 2 : use several PIE configurations,\n"
+	"		    compare drop rate to drop probability\n\n",
+	.htxt = "PIE config     "
+	"avg queue size "
+	"enqueued       "
+	"dropped        "
+	"drop prob %    "
+	"drop rate %    "
+	"diff %         "
+	"tolerance %    "
+	"\n",
+	.tconfig = &ft2_tconfig,
+	.tqueue = &ft_tqueue,
+	.tvar = &ft_tvar,
+	.tlevel = ft2_tlevel,
+};
+
+static enum test_result func_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	printf("%s", tcfg->htxt);
+
+	for (i = 0; i < tcfg->tconfig->num_cfg; i++) {
+		uint32_t avg = 0;
+		double drop_rate = 0.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		if (test_rte_pie_init(tcfg) != PASS) {
+			result = FAIL;
+			goto out;
+		}
+
+		rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+		rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+		*tcfg->tvar->enqueued = 0;
+		*tcfg->tvar->dropped = 0;
+
+		if (increase_qsize(&tcfg->tconfig->pconfig[i],
+					tcfg->tqueue->pdata_in,
+					tcfg->tqueue->qlen,
+					*tcfg->tlevel,
+					tcfg->tqueue->q_ramp_up) != 0) {
+			result = FAIL;
+			goto out;
+		}
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		avg = rte_pie_get_avg_dq_time(NULL, tcfg->tqueue->pdata_in);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+							*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (!check_drop_rate(&diff, drop_rate, drop_prob,
+				 (double)tcfg->tqueue->drop_tolerance)) {
+			fprintf(stderr, "Fail: drop rate outside tolerance\n");
+			result = FAIL;
+		}
+
+		printf("%-15u%-15u%-15u%-15u%-15.4lf%-15.4lf%-15.4lf%-15.4lf\n",
+				i, avg, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance);
+	}
+out:
+	return result;
+}
+
+static uint32_t ft3_qlen[] = {100};
+
+static struct test_rte_pie_config ft3_tconfig =  {
+	.pconfig = ft_wpconfig,
+	.num_cfg = RTE_DIM(ft_wpconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = ft_dp_update_interval,
+	.max_burst = ft_max_burst,
+	.tailq_th = 15,
+};
+
+static struct test_queue ft3_tqueue = {
+	.pdata_in = ft_rtdata,
+	.num_queues = RTE_DIM(ft_rtdata),
+	.qlen = ft3_qlen,
+	.q_ramp_up = 10,
+	.drop_tolerance = 0,
+};
+
+static struct test_var ft3_tvar = {
+	.num_iterations = 0,
+	.num_ops = 10000,
+	.clk_freq = 0,
+	.dropped = ft_dropped,
+	.enqueued = ft_enqueued,
+};
+
+/**
+ * Test F3: functional test 3
+ */
+static uint32_t ft3_tlevels[] =  {64, 127, 222};
+
+static struct test_config func_test_config3 = {
+	.ifname = "functional test interface",
+	.msg = "functional test 2 : use one pie configuration\n"
+			"using non zero qlen\n\n",
+	.htxt = "                "
+	"drop probability "
+	"enqueued    "
+	"dropped     "
+	"drop prob % "
+	"drop rate % "
+	"diff %      "
+	"tolerance % "
+	"active  "
+	"\n",
+	.tconfig = &ft3_tconfig,
+	.tqueue = &ft3_tqueue,
+	.tvar = &ft3_tvar,
+	.tlevel = ft3_tlevels,
+};
+
+static enum test_result func_test3(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+
+	printf("%s", tcfg->msg);
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	printf("%s", tcfg->htxt);
+
+	/**
+	 * reset rte_pie run-time data
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	if (increase_qsize(&tcfg->tconfig->pconfig[i],
+				tcfg->tqueue->pdata_in,
+				tcfg->tqueue->qlen,
+				tcfg->tlevel[i],
+				tcfg->tqueue->q_ramp_up) != 0) {
+		fprintf(stderr, "Fail: increase qsize\n");
+		result = FAIL;
+		goto out;
+	}
+
+	for (i = 0; i < RTE_DIM(ft_tlevels); i++) {
+		const char *label = NULL;
+		uint16_t prob = 0;
+		uint16_t active = 0;
+		double drop_rate = 1.0;
+		double drop_prob = 0.0;
+		double diff = 0.0;
+
+		enqueue_dequeue_func(&tcfg->tconfig->pconfig[i],
+				     tcfg->tqueue->pdata_in,
+				     tcfg->tqueue->qlen,
+				     tcfg->tvar->num_ops,
+				     tcfg->tvar->enqueued,
+				     tcfg->tvar->dropped);
+
+		drop_rate = calc_drop_rate(*tcfg->tvar->enqueued,
+						*tcfg->tvar->dropped);
+		drop_prob = rte_pie_get_drop_prob(NULL, tcfg->tqueue->pdata_in);
+
+		if (drop_prob != 0) {
+			fprintf(stderr, "Fail: check drop prob\n");
+			result = FAIL;
+		}
+
+		if (drop_rate != 0) {
+			fprintf(stderr, "Fail: check drop rate\n");
+			result = FAIL;
+		}
+
+		label = "Summary           ";
+		active = rte_pie_get_active(NULL, tcfg->tqueue->pdata_in);
+		printf("%s%-16u%-12u%-12u%-12.4lf%-12.4lf%-12.4lf%-12.4lf%-8i\n",
+				label, prob, *tcfg->tvar->enqueued, *tcfg->tvar->dropped,
+				drop_prob * 100.0, drop_rate * 100.0, diff,
+				(double)tcfg->tqueue->drop_tolerance, active);
+	}
+out:
+	return result;
+}
+
+/**
+ * setup default values for the Performance test structures
+ */
+static struct rte_pie_config pt_wrconfig[1];
+static struct rte_pie pt_rtdata[1];
+static struct rte_pie pt_wtdata[1];
+static uint32_t pt_q[] = {0};
+static uint32_t pt_dropped[] = {0};
+static uint32_t pt_enqueued[] = {0};
+static uint32_t pt_dequeued[] = {0};
+static uint16_t pt_max_burst[] = {64};
+static uint16_t pt_dp_update_interval[] = {150};
+
+static struct test_rte_pie_config pt_tconfig =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue = {
+	.pdata_in = pt_rtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+static struct test_rte_pie_config pt_tconfig2 =  {
+	.pconfig = pt_wrconfig,
+	.num_cfg = RTE_DIM(pt_wrconfig),
+	.qdelay_ref = 15,
+	.dp_update_interval = pt_dp_update_interval,
+	.max_burst = pt_max_burst,
+	.tailq_th = 150,
+};
+
+static struct test_queue pt_tqueue2 = {
+	.pdata_in = pt_rtdata,
+	.pdata_out = pt_wtdata,
+	.num_queues = RTE_DIM(pt_rtdata),
+	.qlen = pt_q,
+	.q_ramp_up = 1000000,
+	.drop_tolerance = 0,  /* 0 percent */
+};
+
+/**
+ * enqueue/dequeue packets
+ * aka
+ *  rte_sched_port_enqueue(port, in_mbufs, 10);
+ *	rte_sched_port_dequeue(port, out_mbufs, 10);
+ */
+static void enqueue_dequeue_perf(struct rte_pie_config *pie_cfg,
+				 struct rte_pie *pie_in,
+				 struct rte_pie *pie_out,
+				 uint32_t *qlen,
+				 uint32_t num_ops,
+				 uint32_t *enqueued,
+				 uint32_t *dropped,
+				 uint32_t *dequeued,
+				 struct rdtsc_prof *prof)
+{
+	uint32_t i = 0;
+
+	if (pie_cfg == NULL) {
+		printf("%s: Error: PIE configuration cannot be empty.\n", __func__);
+		return;
+	}
+
+	if (pie_in == NULL) {
+		printf("%s: Error: PIE enqueue data cannot be empty.\n", __func__);
+		return;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		uint64_t ts = 0;
+		int ret = 0;
+
+		/**
+		 * enqueue
+		 */
+		ts = get_port_ts();
+		rdtsc_prof_start(prof);
+		ret = rte_pie_enqueue(pie_cfg, pie_in, *qlen,
+								1000*sizeof(uint32_t), ts);
+		rdtsc_prof_end(prof);
+
+		if (ret == 0)
+			(*enqueued)++;
+		else
+			(*dropped)++;
+
+		if (pie_out != NULL) {
+			ts = get_port_ts();
+			rdtsc_prof_start(prof);
+			rte_pie_dequeue(pie_out, 1000*sizeof(uint32_t), ts);
+			rdtsc_prof_end(prof);
+
+			(*dequeued)++;
+		}
+	}
+}
+
+/**
+ * Setup test structures for tests P1
+ * performance tests 1
+ */
+static uint32_t pt1_tlevel[] = {80};
+
+static struct test_var perf1_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued
+};
+
+static struct test_config perf_test_config = {
+	.ifname = "performance test 1 interface",
+	.msg = "performance test 1 : use one PIE configuration,\n"
+	"		     measure enqueue performance\n\n",
+	.tconfig = &pt_tconfig,
+	.tqueue = &pt_tqueue,
+	.tvar = &perf1_tvar,
+	.tlevel = pt1_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue performance.
+ *
+ */
+static enum test_result perf_test(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+			     tcfg->tqueue->pdata_in,
+				 NULL,
+			     tcfg->tqueue->qlen,
+			     tcfg->tvar->num_ops,
+			     tcfg->tvar->enqueued,
+			     tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+			     &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, enqueued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->enqueued,
+			((double)(*tcfg->tvar->enqueued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+
+
+/**
+ * Setup test structures for tests P2
+ * performance tests 2
+ */
+static uint32_t pt2_tlevel[] = {80};
+
+static struct test_var perf2_tvar = {
+	.num_iterations = 0,
+	.num_ops = 30000,
+	.clk_freq = 0,
+	.dropped = pt_dropped,
+	.enqueued = pt_enqueued,
+	.dequeued = pt_dequeued
+};
+
+static struct test_config perf_test_config2 = {
+	.ifname = "performance test 2 interface",
+	.msg = "performance test 2 : use one PIE configuration,\n"
+	"		     measure enqueue & dequeue performance\n\n",
+	.tconfig = &pt_tconfig2,
+	.tqueue = &pt_tqueue2,
+	.tvar = &perf2_tvar,
+	.tlevel = pt2_tlevel,
+};
+
+/**
+ * Performance test function to measure enqueue & dequeue performance.
+ *
+ */
+static enum test_result perf_test2(struct test_config *tcfg)
+{
+	enum test_result result = PASS;
+	struct rdtsc_prof prof = {0, 0, 0, 0, 0.0, NULL};
+	uint32_t total = 0;
+
+	printf("%s", tcfg->msg);
+
+	rdtsc_prof_init(&prof, "enqueue");
+
+	if (test_rte_pie_init(tcfg) != PASS) {
+		result = FAIL;
+		goto out;
+	}
+
+	/**
+	 * initialize the rte_pie run time data structure
+	 */
+	rte_pie_rt_data_init(tcfg->tqueue->pdata_in);
+	rte_pie_set_active(NULL, tcfg->tqueue->pdata_in, 1);
+	*tcfg->tvar->enqueued = 0;
+	*tcfg->tvar->dequeued = 0;
+	*tcfg->tvar->dropped = 0;
+
+	enqueue_dequeue_perf(tcfg->tconfig->pconfig,
+				 tcfg->tqueue->pdata_in,
+				 tcfg->tqueue->pdata_out,
+				 tcfg->tqueue->qlen,
+				 tcfg->tvar->num_ops,
+				 tcfg->tvar->enqueued,
+				 tcfg->tvar->dropped,
+				 tcfg->tvar->dequeued,
+				 &prof);
+
+	total = *tcfg->tvar->enqueued + *tcfg->tvar->dropped;
+
+	printf("\ntotal: %u, dequeued: %u (%.2lf%%), dropped: %u (%.2lf%%)\n",
+			total, *tcfg->tvar->dequeued,
+			((double)(*tcfg->tvar->dequeued) / (double)total) * 100.0,
+			*tcfg->tvar->dropped,
+			((double)(*tcfg->tvar->dropped) / (double)total) * 100.0);
+
+	rdtsc_prof_print(&prof);
+out:
+	return result;
+}
+
+/**
+ * define the functional tests to be executed fast
+ */
+struct tests func_pie_tests_quick[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+};
+
+/**
+ * define the functional and performance tests to be executed
+ */
+struct tests func_pie_tests[] = {
+	{ &func_test_config1, func_test1 },
+	{ &func_test_config2, func_test2 },
+	{ &func_test_config3, func_test3 },
+};
+
+struct tests perf_pie_tests[] = {
+	{ &perf_test_config, perf_test },
+	{ &perf_test_config2, perf_test2 },
+};
+
+/**
+ * function to execute the required pie tests
+ */
+static void run_tests(struct tests *test_type, uint32_t test_count,
+						uint32_t *num_tests, uint32_t *num_pass)
+{
+	enum test_result result = PASS;
+	uint32_t i = 0;
+	static const char *bar_str = "-------------------------------------"
+						"-------------------------------------------";
+	static const char *bar_pass_str = "-------------------------------------"
+						"<pass>-------------------------------------";
+	static const char *bar_fail_str = "-------------------------------------"
+						"<fail>-------------------------------------";
+
+	for (i = 0; i < test_count; i++) {
+		printf("\n%s\n", bar_str);
+		result = test_type[i].testfn(test_type[i].testcfg);
+		(*num_tests)++;
+		if (result == PASS) {
+			(*num_pass)++;
+				printf("%s\n", bar_pass_str);
+		} else {
+			printf("%s\n", bar_fail_str);
+		}
+	}
+}
+
+/**
+ * check if functions accept invalid parameters
+ *
+ * First, all functions will be called without initialized PIE
+ * Then, all of them will be called with NULL/invalid parameters
+ *
+ * Some functions are not tested as they are performance-critical and thus
+ * don't do any parameter checking.
+ */
+static int
+test_invalid_parameters(void)
+{
+	struct rte_pie_config config;
+	static const char *shf_str = "rte_pie_config_init should have failed!";
+	static const char *shf_rt_str = "rte_pie_rt_data_init should have failed!";
+
+	/* NULL config */
+	if (rte_pie_rt_data_init(NULL) == 0) {
+		printf("%i: %s\n", __LINE__, shf_rt_str);
+		return -1;
+	}
+
+	/* NULL config */
+	if (rte_pie_config_init(NULL, 0, 0, 0, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* qdelay_ref <= 0 */
+	if (rte_pie_config_init(&config, 0, 1, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* dp_update_interval <= 0 */
+	if (rte_pie_config_init(&config, 1, 0, 1, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* max_burst <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 0, 1) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	/* tailq_th <= 0 */
+	if (rte_pie_config_init(&config, 1, 1, 1, 0) == 0) {
+		printf("%i%s\n", __LINE__, shf_str);
+		return -1;
+	}
+
+	RTE_SET_USED(config);
+
+	return 0;
+}
+
+static void
+show_stats(const uint32_t num_tests, const uint32_t num_pass)
+{
+	if (num_pass == num_tests)
+		printf("[total: %u, pass: %u]\n", num_tests, num_pass);
+	else
+		printf("[total: %u, pass: %u, fail: %u]\n", num_tests, num_pass,
+		       num_tests - num_pass);
+}
+
+static int
+tell_the_result(const uint32_t num_tests, const uint32_t num_pass)
+{
+	return (num_pass == num_tests) ? 0 : 1;
+}
+
+static int
+test_pie(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests_quick, RTE_DIM(func_pie_tests_quick),
+		  &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_perf(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+static int
+test_pie_all(void)
+{
+	uint32_t num_tests = 0;
+	uint32_t num_pass = 0;
+
+	if (test_invalid_parameters() < 0)
+		return -1;
+
+	run_tests(func_pie_tests, RTE_DIM(func_pie_tests), &num_tests, &num_pass);
+	run_tests(perf_pie_tests, RTE_DIM(perf_pie_tests), &num_tests, &num_pass);
+	show_stats(num_tests, num_pass);
+	return tell_the_result(num_tests, num_pass);
+}
+
+REGISTER_TEST_COMMAND(pie_autotest, test_pie);
+REGISTER_TEST_COMMAND(pie_perf, test_pie_perf);
+REGISTER_TEST_COMMAND(pie_all, test_pie_all);
diff --git a/config/rte_config.h b/config/rte_config.h
index 1a66b42fcc..4ab1d86fdf 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -89,7 +89,7 @@
 #define RTE_MAX_LCORE_FREQS 64
 
 /* rte_sched defines */
-#undef RTE_SCHED_RED
+#undef RTE_SCHED_CMAN
 #undef RTE_SCHED_COLLECT_STATS
 #undef RTE_SCHED_SUBPORT_TC_OV
 #define RTE_SCHED_PORT_N_GRINDERS 8
diff --git a/doc/guides/prog_guide/glossary.rst b/doc/guides/prog_guide/glossary.rst
index 7044a7df2a..fb0910ba5b 100644
--- a/doc/guides/prog_guide/glossary.rst
+++ b/doc/guides/prog_guide/glossary.rst
@@ -158,6 +158,9 @@ PCI
 PHY
    An abbreviation for the physical layer of the OSI model.
 
+PIE
+   Proportional Integral Controller Enhanced (RFC8033)
+
 pktmbuf
    An *mbuf* carrying a network packet.
 
diff --git a/doc/guides/prog_guide/qos_framework.rst b/doc/guides/prog_guide/qos_framework.rst
index 3b8a1184b0..89ea199529 100644
--- a/doc/guides/prog_guide/qos_framework.rst
+++ b/doc/guides/prog_guide/qos_framework.rst
@@ -56,7 +56,8 @@ A functional description of each block is provided in the following table.
    |   |                        |                                                                                |
    +---+------------------------+--------------------------------------------------------------------------------+
    | 7 | Dropper                | Congestion management using the Random Early Detection (RED) algorithm         |
-   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED).    |
+   |   |                        | (specified by the Sally Floyd - Van Jacobson paper) or Weighted RED (WRED)     |
+   |   |                        | or Proportional Integral Controller Enhanced (PIE).                            |
    |   |                        | Drop packets based on the current scheduler queue load level and packet        |
    |   |                        | priority. When congestion is experienced, lower priority packets are dropped   |
    |   |                        | first.                                                                         |
@@ -421,7 +422,7 @@ No input packet can be part of more than one pipeline stage at a given time.
 The congestion management scheme implemented by the enqueue pipeline described above is very basic:
 packets are enqueued until a specific queue becomes full,
 then all the packets destined to the same queue are dropped until packets are consumed (by the dequeue operation).
-This can be improved by enabling RED/WRED as part of the enqueue pipeline which looks at the queue occupancy and
+This can be improved by enabling RED/WRED or PIE as part of the enqueue pipeline which looks at the queue occupancy and
 packet priority in order to yield the enqueue/drop decision for a specific packet
 (as opposed to enqueuing all packets / dropping all packets indiscriminately).
 
@@ -1155,13 +1156,13 @@ If the number of queues is small,
 then the performance of the port scheduler for the same level of active traffic is expected to be worse than
 the performance of a small set of message passing queues.
 
-.. _Dropper:
+.. _Droppers:
 
-Dropper
--------
+Droppers
+--------
 
 The purpose of the DPDK dropper is to drop packets arriving at a packet scheduler to avoid congestion.
-The dropper supports the Random Early Detection (RED),
+The dropper supports the Proportional Integral Controller Enhanced (PIE), Random Early Detection (RED),
 Weighted Random Early Detection (WRED) and tail drop algorithms.
 :numref:`figure_blk_diag_dropper` illustrates how the dropper integrates with the scheduler.
 The DPDK currently does not support congestion management
@@ -1174,9 +1175,13 @@ so the dropper provides the only method for congestion avoidance.
    High-level Block Diagram of the DPDK Dropper
 
 
-The dropper uses the Random Early Detection (RED) congestion avoidance algorithm as documented in the reference publication.
-The purpose of the RED algorithm is to monitor a packet queue,
+The dropper uses one of two congestion avoidance algorithms:
+   - the Random Early Detection (RED) as documented in the reference publication.
+   - the Proportional Integral Controller Enhanced (PIE) as documented in RFC8033 publication.
+
+The purpose of the RED/PIE algorithm is to monitor a packet queue,
 determine the current congestion level in the queue and decide whether an arriving packet should be enqueued or dropped.
+
 The RED algorithm uses an Exponential Weighted Moving Average (EWMA) filter to compute average queue size which
 gives an indication of the current congestion level in the queue.
 
@@ -1192,7 +1197,7 @@ This occurs when a packet queue has reached maximum capacity and cannot store an
 In this situation, all arriving packets are dropped.
 
 The flow through the dropper is illustrated in :numref:`figure_flow_tru_droppper`.
-The RED/WRED algorithm is exercised first and tail drop second.
+The RED/WRED/PIE algorithm is exercised first and tail drop second.
 
 .. _figure_flow_tru_droppper:
 
@@ -1200,6 +1205,16 @@ The RED/WRED algorithm is exercised first and tail drop second.
 
    Flow Through the Dropper
 
+The PIE algorithm periodically updates the drop probability based on the latency samples.
+The current latency sample but also analyze whether the latency is trending up or down.
+This is the classical Proportional Integral (PI) controller method, which is known for
+eliminating steady state errors.
+
+When a congestion period ends, we might be left with a high drop probability with light
+packet arrivals. Hence, the PIE algorithm includes a mechanism by which the drop probability
+decays exponentially (rather than linearly) when the system is not congested.
+This would help the drop probability converge to 0 more quickly, while the PI controller ensures
+that it would eventually reach zero.
 
 The use cases supported by the dropper are:
 
@@ -1253,6 +1268,35 @@ to a mark probability of 1/10 (that is, 1 in 10 packets will be dropped).
 The EWMA filter weight parameter is specified as an inverse log value,
 for example, a filter weight parameter value of 9 corresponds to a filter weight of 1/29.
 
+A PIE configuration contains the parameters given in :numref:`table_qos_16a`.
+
+.. _table_qos_16a:
+
+.. table:: PIE Configuration Parameters
+
+   +--------------------------+---------+---------+------------------+
+   | Parameter                | Minimum | Maximum | Default          |
+   |                          |         |         |                  |
+   +==========================+=========+=========+==================+
+   | Queue delay reference    | 1       | uint16  | 15               |
+   | Latency Target Value     |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Max Burst Allowance      | 1       | uint16  | 150              |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Tail Drop Threshold      | 1       | uint16  | 64               |
+   | Unit: bytes              |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+   | Period to calculate      | 1       | uint16  | 15               |
+   | drop probability         |         |         |                  |
+   | Unit: ms                 |         |         |                  |
+   +--------------------------+---------+---------+------------------+
+
+The meaning of these parameters is explained in more detail in the next sections.
+The format of these parameters as specified to the dropper module API.
+They could made self calculated for fine tuning, within the apps.
+
 .. _Enqueue_Operation:
 
 Enqueue Operation
@@ -1396,7 +1440,7 @@ As can be seen, the floating-point implementation achieved the worst performance
    | Method                                                                             | Relative Performance |
    |                                                                                    |                      |
    +====================================================================================+======================+
-   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Dropper>`)                   | 100%                 |
+   | Current dropper method (see :ref:`Section 23.3.2.1.3 <Droppers>`)                  | 100%                 |
    |                                                                                    |                      |
    +------------------------------------------------------------------------------------+----------------------+
    | Fixed-point method with small (512B) look-up table                                 | 148%                 |
diff --git a/doc/guides/prog_guide/traffic_management.rst b/doc/guides/prog_guide/traffic_management.rst
index 05b34d93a5..c356791a45 100644
--- a/doc/guides/prog_guide/traffic_management.rst
+++ b/doc/guides/prog_guide/traffic_management.rst
@@ -22,6 +22,7 @@ Main features:
   shared (by multiple nodes) shapers
 * Congestion management for hierarchy leaf nodes: algorithms of tail drop, head
   drop, WRED, private (per node) and shared (by multiple nodes) WRED contexts
+  and PIE.
 * Packet marking: IEEE 802.1q (VLAN DEI), IETF RFC 3168 (IPv4/IPv6 ECN for TCP
   and SCTP), IETF RFC 2597 (IPv4 / IPv6 DSCP)
 
@@ -103,8 +104,9 @@ Congestion Management
 Congestion management is used to control the admission of packets into a packet
 queue or group of packet queues on congestion. The congestion management
 algorithms that are supported are: Tail Drop, Head Drop and Weighted Random
-Early Detection (WRED). They are made available for every leaf node in the
-hierarchy, subject to the specific implementation supporting them.
+Early Detection (WRED), Proportional Integral Controller Enhanced (PIE).
+They are made available for every leaf node in the hierarchy, subject to
+the specific implementation supporting them.
 On request of writing a new packet into the current queue while the queue is
 full, the Tail Drop algorithm drops the new packet while leaving the queue
 unmodified, as opposed to the Head Drop* algorithm, which drops the packet
@@ -128,6 +130,13 @@ The configuration of WRED private and shared contexts is done through the
 definition of WRED profiles. Any WRED profile can be used by one or several
 WRED contexts (either private or shared).
 
+The Proportional Integral Controller Enhanced (PIE) algorithm works by proactively
+dropping packets randomly. Calculated drop probability is updated periodically,
+based on latency measured and desired and whether the queuing latency is currently
+trending up or down. Queuing latency can be obtained using direct measurement or
+on estimations calculated from the queue length and dequeue rate. The random drop
+is triggered by a packet's arrival before enqueuing into a queue.
+
 
 Packet Marking
 --------------
diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c
index 90baba15ce..e74092ce7f 100644
--- a/drivers/net/softnic/rte_eth_softnic_tm.c
+++ b/drivers/net/softnic/rte_eth_softnic_tm.c
@@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev,
 	return 0;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 #define WRED_SUPPORTED						1
 #else
 #define WRED_SUPPORTED						0
@@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id)
 	return NULL;
 }
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static void
 wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
@@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id)
 	for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++)
 		for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) {
 			struct rte_red_params *dst =
-				&pp->red_params[tc_id][color];
+				&pp->cman_params->red_params[tc_id][color];
 			struct tm_wred_profile *src_wp =
 				tm_tc_wred_profile_get(dev, tc_id);
 			struct rte_tm_red_params *src =
diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index e4e364cbc0..406184e760 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -25,7 +25,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.pipe_profiles = pipe_profile,
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 .red_params = {
 	/* Traffic Class 0 Colors Green / Yellow / Red */
 	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -92,7 +92,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 };
 
 static struct tmgr_port_list tmgr_port_list;
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index cd167bd8e6..4bef887099 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -242,7 +242,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	memset(active_queues, 0, sizeof(active_queues));
 	n_active_queues = 0;
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 	char sec_name[CFG_NAME_LEN];
 	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 
@@ -315,7 +315,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
 		char sec_name[CFG_NAME_LEN];
@@ -393,7 +393,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 					}
 				}
 			}
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
 				for (k = 0; k < RTE_COLORS; k++) {
 					subport_params[i].red_params[j][k].min_th =
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 9b34e4a76b..3bdc653c69 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -211,7 +211,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		.n_pipe_profiles = sizeof(pipe_profiles) /
 			sizeof(struct rte_sched_pipe_params),
 		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,7 +278,7 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
-#endif /* RTE_SCHED_RED */
+#endif /* RTE_SCHED_CMAN */
 	},
 };
 
diff --git a/lib/sched/meson.build b/lib/sched/meson.build
index 8ced4547aa..df75db51ed 100644
--- a/lib/sched/meson.build
+++ b/lib/sched/meson.build
@@ -7,11 +7,12 @@ if is_windows
     subdir_done()
 endif
 
-sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c')
+sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c')
 headers = files(
         'rte_approx.h',
         'rte_red.h',
         'rte_sched.h',
         'rte_sched_common.h',
+        'rte_pie.h',
 )
 deps += ['mbuf', 'meter']
diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c
new file mode 100644
index 0000000000..934e9aee50
--- /dev/null
+++ b/lib/sched/rte_pie.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include <stdlib.h>
+
+#include "rte_pie.h"
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+
+#ifdef __INTEL_COMPILER
+#pragma warning(disable:2259) /* conversion may lose significant bits */
+#endif
+
+int
+rte_pie_rt_data_init(struct rte_pie *pie)
+{
+	if (pie == NULL) {
+		/* Allocate memory to use the PIE data structure */
+		pie = rte_malloc(NULL, sizeof(struct rte_pie), 0);
+
+		if (pie == NULL)
+			RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
+
+		return -1;
+	}
+
+	pie->active = 0;
+	pie->in_measurement = 0;
+	pie->departed_bytes_count = 0;
+	pie->start_measurement = 0;
+	pie->last_measurement = 0;
+	pie->qlen = 0;
+	pie->avg_dq_time = 0;
+	pie->burst_allowance = 0;
+	pie->qdelay_old = 0;
+	pie->drop_prob = 0;
+	pie->accu_prob = 0;
+
+	return 0;
+}
+
+int
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th)
+{
+	uint64_t tsc_hz = rte_get_tsc_hz();
+
+	if (pie_cfg == NULL)
+		return -1;
+
+	if (qdelay_ref <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for qdelay_ref\n", __func__);
+		return -EINVAL;
+	}
+
+	if (dp_update_interval <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for dp_update_interval\n", __func__);
+		return -EINVAL;
+	}
+
+	if (max_burst <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for max_burst\n", __func__);
+		return -EINVAL;
+	}
+
+	if (tailq_th <= 0) {
+		RTE_LOG(ERR, SCHED,
+			"%s: Incorrect value for tailq_th\n", __func__);
+		return -EINVAL;
+	}
+
+	pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000;
+	pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000;
+	pie_cfg->max_burst = (tsc_hz * max_burst) / 1000;
+	pie_cfg->tailq_th = tailq_th;
+
+	return 0;
+}
diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h
new file mode 100644
index 0000000000..a3441ffe51
--- /dev/null
+++ b/lib/sched/rte_pie.h
@@ -0,0 +1,396 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#ifndef __RTE_PIE_H_INCLUDED__
+#define __RTE_PIE_H_INCLUDED__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * Proportional Integral controller Enhanced (PIE)
+ **/
+
+#include <stdint.h>
+
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+
+#define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
+				     * to start measurement cycle (bytes)
+				     */
+#define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
+#define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
+#define RTE_BETA           1.25    /**< Weights in drop probability calculations */
+#define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
+
+
+/**
+ * PIE configuration parameters passed by user
+ *
+ */
+struct rte_pie_params {
+	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
+	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
+	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE configuration parameters
+ *
+ */
+struct rte_pie_config {
+	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
+	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
+	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
+	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
+};
+
+/**
+ * PIE run-time data
+ */
+struct rte_pie {
+	uint16_t active;               /**< Flag for activating/deactivating pie */
+	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
+	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
+	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
+	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
+	uint64_t qlen;                 /**< Queue length (packets count) */
+	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
+	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
+	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
+	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
+	double drop_prob;              /**< Current packet drop probability */
+	double accu_prob;              /**< Accumulated packet drop probability */
+};
+
+/**
+ * @brief Initialises run-time data
+ *
+ * @param pie [in,out] data pointer to PIE runtime data
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_rt_data_init(struct rte_pie *pie);
+
+/**
+ * @brief Configures a single PIE configuration parameter structure.
+ *
+ * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
+ * @param qdelay_ref [in]  latency target(milliseconds)
+ * @param dp_update_interval [in] update interval for drop probability (milliseconds)
+ * @param max_burst [in] maximum burst allowance (milliseconds)
+ * @param tailq_th [in] tail drop threshold for the queue (number of packets)
+ *
+ * @return Operation status
+ * @retval 0 success
+ * @retval !0 error
+ */
+int
+__rte_experimental
+rte_pie_config_init(struct rte_pie_config *pie_cfg,
+	const uint16_t qdelay_ref,
+	const uint16_t dp_update_interval,
+	const uint16_t max_burst,
+	const uint16_t tailq_th);
+
+/**
+ * @brief Decides packet enqueue when queue is empty
+ *
+ * Note: packet is never dropped in this particular case.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval !0 drop the packet
+ */
+static int
+__rte_experimental
+rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len)
+{
+	RTE_ASSERT(pkt_len != NULL);
+
+	/* Update the PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/**
+	 * If the queue has been idle for a while, turn off PIE and Reset counters
+	 */
+	if ((pie->active == 1) &&
+		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	return 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ * @param time [in] current time (measured in cpu cycles)
+ */
+static void
+__rte_experimental
+_calc_drop_probability(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie, uint64_t time)
+{
+	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
+
+	/* Note: can be implemented using integer multiply.
+	 * DQ_THRESHOLD is power of 2 value.
+	 */
+	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
+
+	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
+		RTE_BETA * (current_qdelay - pie->qdelay_old);
+
+	if (pie->drop_prob < 0.000001)
+		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
+	else if (pie->drop_prob < 0.00001)
+		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
+	else if (pie->drop_prob < 0.0001)
+		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
+	else if (pie->drop_prob < 0.001)
+		p = p * 0.03125;                    /* (1/32) = 0.03125   */
+	else if (pie->drop_prob < 0.01)
+		p = p * 0.125;                      /* (1/8) = 0.125    */
+	else if (pie->drop_prob < 0.1)
+		p = p * 0.5;                        /* (1/2) = 0.5    */
+
+	if (pie->drop_prob >= 0.1 && p > 0.02)
+		p = 0.02;
+
+	pie->drop_prob += p;
+
+	double qdelay = qdelay_ref * 0.5;
+
+	/*  Exponentially decay drop prob when congestion goes away  */
+	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
+		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
+
+	/* Bound drop probability */
+	if (pie->drop_prob < 0)
+		pie->drop_prob = 0;
+	if (pie->drop_prob > 1)
+		pie->drop_prob = 1;
+
+	pie->qdelay_old = current_qdelay;
+	pie->last_measurement = time;
+
+	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
+
+	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
+}
+
+/**
+ * @brief make a decision to drop or enqueue a packet based on probability
+ *        criteria
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in, out] data pointer to PIE runtime data
+ *
+ * @return operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet
+ */
+static inline int
+__rte_experimental
+_rte_pie_drop(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie)
+{
+	uint64_t rand_value;
+	double qdelay = pie_cfg->qdelay_ref * 0.5;
+
+	/* PIE is active but the queue is not congested: return 0 */
+	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
+		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
+		return 0;
+
+	if (pie->drop_prob == 0)
+		pie->accu_prob = 0;
+
+	/* For practical reasons, drop probability can be further scaled according
+	 * to packet size, but one needs to set a bound to avoid unnecessary bias
+	 * Random drop
+	 */
+	pie->accu_prob += pie->drop_prob;
+
+	if (pie->accu_prob < 0.85)
+		return 0;
+
+	if (pie->accu_prob >= 8.5)
+		return 1;
+
+	rand_value = rte_rand()/RTE_RAND_MAX;
+
+	if ((double)rand_value < pie->drop_prob) {
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped for non-empty queue
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on max threshold criterion
+ * @retval 2 drop the packet based on mark probability criterion
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	/* Check queue space against the tail drop threshold */
+	if (pie->qlen >= pie_cfg->tailq_th) {
+
+		pie->accu_prob = 0;
+		return 1;
+	}
+
+	if (pie->active) {
+		/* Update drop probability after certain interval */
+		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
+			_calc_drop_probability(pie_cfg, pie, time);
+
+		/* Decide whether packet to be dropped or enqueued */
+		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
+			return 2;
+	}
+
+	/* When queue occupancy is over a certain threshold, turn on PIE */
+	if ((pie->active == 0) &&
+		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
+		pie->active = 1;
+		pie->qdelay_old = 0;
+		pie->drop_prob = 0;
+		pie->in_measurement = 1;
+		pie->departed_bytes_count = 0;
+		pie->avg_dq_time = 0;
+		pie->last_measurement = time;
+		pie->burst_allowance = pie_cfg->max_burst;
+		pie->accu_prob = 0;
+		pie->start_measurement = time;
+	}
+
+	/* when queue has been idle for a while, turn off PIE and Reset counters */
+	if (pie->active == 1 &&
+		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
+		pie->active =  0;
+		pie->in_measurement = 0;
+	}
+
+	/* Update PIE qlen parameter */
+	pie->qlen++;
+	pie->qlen_bytes += pkt_len;
+
+	/* No drop */
+	return 0;
+}
+
+/**
+ * @brief Decides if new packet should be enqeued or dropped
+ * Updates run time data and gives verdict whether to enqueue or drop the packet.
+ *
+ * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
+ * @param pie [in,out] data pointer to PIE runtime data
+ * @param qlen [in] queue length
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp (measured in cpu cycles)
+ *
+ * @return Operation status
+ * @retval 0 enqueue the packet
+ * @retval 1 drop the packet based on drop probility criteria
+ */
+static inline int
+__rte_experimental
+rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
+	struct rte_pie *pie,
+	const unsigned int qlen,
+	uint32_t pkt_len,
+	const uint64_t time)
+{
+	RTE_ASSERT(pie_cfg != NULL);
+	RTE_ASSERT(pie != NULL);
+
+	if (qlen != 0)
+		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
+	else
+		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
+}
+
+/**
+ * @brief PIE rate estimation method
+ * Called on each packet departure.
+ *
+ * @param pie [in] data pointer to PIE runtime data
+ * @param pkt_len [in] packet length in bytes
+ * @param time [in] current time stamp in cpu cycles
+ */
+static inline void
+__rte_experimental
+rte_pie_dequeue(struct rte_pie *pie,
+	uint32_t pkt_len,
+	uint64_t time)
+{
+	/* Dequeue rate estimation */
+	if (pie->in_measurement) {
+		pie->departed_bytes_count += pkt_len;
+
+		/* Start a new measurement cycle when enough packets */
+		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
+			uint64_t dq_time = time - pie->start_measurement;
+
+			if (pie->avg_dq_time == 0)
+				pie->avg_dq_time = dq_time;
+			else
+				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
+					* (1 - RTE_DQ_WEIGHT);
+
+			pie->in_measurement = 0;
+		}
+	}
+
+	/* Start measurement cycle when enough data in the queue */
+	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
+		pie->in_measurement = 1;
+		pie->start_measurement = time;
+		pie->departed_bytes_count = 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PIE_H_INCLUDED__ */
diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c
index a858f61f95..2fe32bbd33 100644
--- a/lib/sched/rte_sched.c
+++ b/lib/sched/rte_sched.c
@@ -89,8 +89,12 @@ struct rte_sched_queue {
 
 struct rte_sched_queue_extra {
 	struct rte_sched_queue_stats stats;
-#ifdef RTE_SCHED_RED
-	struct rte_red red;
+#ifdef RTE_SCHED_CMAN
+	RTE_STD_C11
+	union {
+		struct rte_red red;
+		struct rte_pie pie;
+	};
 #endif
 };
 
@@ -183,8 +187,15 @@ struct rte_sched_subport {
 	/* Pipe queues size */
 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+#ifdef RTE_SCHED_CMAN
+	bool cman_enabled;
+	enum rte_sched_cman_mode cman;
+
+	RTE_STD_C11
+	union {
+		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
 #endif
 
 	/* Scheduling loop detection */
@@ -1078,6 +1089,90 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
 	rte_free(port);
 }
 
+#ifdef RTE_SCHED_CMAN
+static int
+rte_sched_red_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+
+		uint32_t j;
+
+		for (j = 0; j < RTE_COLORS; j++) {
+			/* if min/max are both zero, then RED is disabled */
+			if ((params->cman_params->red_params[i][j].min_th |
+				 params->cman_params->red_params[i][j].max_th) == 0) {
+				continue;
+			}
+
+			if (rte_red_config_init(&s->red_config[i][j],
+				params->cman_params->red_params[i][j].wq_log2,
+				params->cman_params->red_params[i][j].min_th,
+				params->cman_params->red_params[i][j].max_th,
+				params->cman_params->red_params[i][j].maxp_inv) != 0) {
+				rte_sched_free_memory(port, n_subports);
+
+				RTE_LOG(NOTICE, SCHED,
+				"%s: RED configuration init fails\n", __func__);
+				return -EINVAL;
+			}
+		}
+	}
+	s->cman = RTE_SCHED_CMAN_RED;
+	return 0;
+}
+
+static int
+rte_sched_pie_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	uint32_t i;
+
+	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE tailq threshold incorrect\n", __func__);
+			return -EINVAL;
+		}
+
+		if (rte_pie_config_init(&s->pie_config[i],
+			params->cman_params->pie_params[i].qdelay_ref,
+			params->cman_params->pie_params[i].dp_update_interval,
+			params->cman_params->pie_params[i].max_burst,
+			params->cman_params->pie_params[i].tailq_th) != 0) {
+			rte_sched_free_memory(port, n_subports);
+
+			RTE_LOG(NOTICE, SCHED,
+			"%s: PIE configuration init fails\n", __func__);
+			return -EINVAL;
+			}
+	}
+	s->cman = RTE_SCHED_CMAN_PIE;
+	return 0;
+}
+
+static int
+rte_sched_cman_config(struct rte_sched_port *port,
+	struct rte_sched_subport *s,
+	struct rte_sched_subport_params *params,
+	uint32_t n_subports)
+{
+	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
+		return rte_sched_red_config(port, s, params, n_subports);
+
+	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
+		return rte_sched_pie_config(port, s, params, n_subports);
+
+	return -EINVAL;
+}
+#endif
+
 int
 rte_sched_subport_config(struct rte_sched_port *port,
 	uint32_t subport_id,
@@ -1167,29 +1262,17 @@ rte_sched_subport_config(struct rte_sched_port *port,
 		s->n_pipe_profiles = params->n_pipe_profiles;
 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
-			uint32_t j;
-
-			for (j = 0; j < RTE_COLORS; j++) {
-			/* if min/max are both zero, then RED is disabled */
-				if ((params->red_params[i][j].min_th |
-				     params->red_params[i][j].max_th) == 0) {
-					continue;
-				}
-
-				if (rte_red_config_init(&s->red_config[i][j],
-				    params->red_params[i][j].wq_log2,
-				    params->red_params[i][j].min_th,
-				    params->red_params[i][j].max_th,
-				    params->red_params[i][j].maxp_inv) != 0) {
-					RTE_LOG(NOTICE, SCHED,
-					"%s: RED configuration init fails\n",
-					__func__);
-					ret = -EINVAL;
-					goto out;
-				}
+#ifdef RTE_SCHED_CMAN
+		if (params->cman_params != NULL) {
+			s->cman_enabled = true;
+			status = rte_sched_cman_config(port, s, params, n_subports);
+			if (status) {
+				RTE_LOG(NOTICE, SCHED,
+					"%s: CMAN configuration fails\n", __func__);
+				return status;
 			}
+		} else {
+			s->cman_enabled = false;
 		}
 #endif
 
@@ -1718,30 +1801,19 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port,
 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
-	struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
 	uint32_t pkt_len = pkt->pkt_len;
 
 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
-#ifdef RTE_SCHED_RED
-	subport->stats.n_pkts_red_dropped[tc_index] += red;
-#endif
+	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
 }
 
 static inline void
@@ -1756,73 +1828,99 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
 	qe->stats.n_bytes += pkt_len;
 }
 
-#ifdef RTE_SCHED_RED
 static inline void
 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
 	uint32_t qindex,
 	struct rte_mbuf *pkt,
-	uint32_t red)
-#else
-static inline void
-rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
-	uint32_t qindex,
-	struct rte_mbuf *pkt,
-	__rte_unused uint32_t red)
-#endif
+	__rte_unused uint32_t n_pkts_cman_dropped)
 {
 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
 	uint32_t pkt_len = pkt->pkt_len;
 
 	qe->stats.n_pkts_dropped += 1;
 	qe->stats.n_bytes_dropped += pkt_len;
-#ifdef RTE_SCHED_RED
-	qe->stats.n_pkts_red_dropped += red;
+#ifdef RTE_SCHED_CMAN
+	if (subport->cman_enabled)
+		qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
 #endif
 }
 
 #endif /* RTE_SCHED_COLLECT_STATS */
 
-#ifdef RTE_SCHED_RED
+#ifdef RTE_SCHED_CMAN
 
 static inline int
-rte_sched_port_red_drop(struct rte_sched_port *port,
+rte_sched_port_cman_drop(struct rte_sched_port *port,
 	struct rte_sched_subport *subport,
 	struct rte_mbuf *pkt,
 	uint32_t qindex,
 	uint16_t qlen)
 {
+	if (!subport->cman_enabled)
+		return 0;
+
 	struct rte_sched_queue_extra *qe;
-	struct rte_red_config *red_cfg;
-	struct rte_red *red;
 	uint32_t tc_index;
-	enum rte_color color;
 
 	tc_index = rte_sched_port_pipe_tc(port, qindex);
-	color = rte_sched_port_pkt_read_color(pkt);
-	red_cfg = &subport->red_config[tc_index][color];
-
-	if ((red_cfg->min_th | red_cfg->max_th) == 0)
-		return 0;
-
 	qe = subport->queue_extra + qindex;
-	red = &qe->red;
 
-	return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	/* RED */
+	if (subport->cman == RTE_SCHED_CMAN_RED) {
+		struct rte_red_config *red_cfg;
+		struct rte_red *red;
+		enum rte_color color;
+
+		color = rte_sched_port_pkt_read_color(pkt);
+		red_cfg = &subport->red_config[tc_index][color];
+
+		if ((red_cfg->min_th | red_cfg->max_th) == 0)
+			return 0;
+
+		red = &qe->red;
+
+		return rte_red_enqueue(red_cfg, red, qlen, port->time);
+	}
+
+	/* PIE */
+	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
+	struct rte_pie *pie = &qe->pie;
+
+	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
 }
 
 static inline void
-rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
+rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
 	struct rte_sched_subport *subport, uint32_t qindex)
 {
-	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
-	struct rte_red *red = &qe->red;
+	if (subport->cman_enabled) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		if (subport->cman == RTE_SCHED_CMAN_RED) {
+			struct rte_red *red = &qe->red;
 
-	rte_red_mark_queue_empty(red, port->time);
+			rte_red_mark_queue_empty(red, port->time);
+		}
+	}
+}
+
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
+uint32_t qindex, uint32_t pkt_len, uint64_t time) {
+	if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
+		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
+		struct rte_pie *pie = &qe->pie;
+
+		/* Update queue length */
+		pie->qlen -= 1;
+		pie->qlen_bytes -= pkt_len;
+
+		rte_pie_dequeue(pie, pkt_len, time);
+	}
 }
 
 #else
 
-static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
+static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
 	struct rte_sched_subport *subport __rte_unused,
 	struct rte_mbuf *pkt __rte_unused,
 	uint32_t qindex __rte_unused,
@@ -1831,9 +1929,17 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus
 	return 0;
 }
 
-#define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
+#define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
 
-#endif /* RTE_SCHED_RED */
+static inline void
+rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
+	uint32_t qindex __rte_unused,
+	uint32_t pkt_len __rte_unused,
+	uint64_t time __rte_unused) {
+	/* do-nothing when RTE_SCHED_CMAN not defined */
+}
+
+#endif /* RTE_SCHED_CMAN */
 
 #ifdef RTE_SCHED_DEBUG
 
@@ -1929,7 +2035,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
 	qlen = q->qw - q->qr;
 
 	/* Drop the packet (and update drop stats) when queue is full */
-	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
+	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
 		     (qlen >= qsize))) {
 		rte_pktmbuf_free(pkt);
 #ifdef RTE_SCHED_COLLECT_STATS
@@ -2402,6 +2508,7 @@ grinder_schedule(struct rte_sched_port *port,
 {
 	struct rte_sched_grinder *grinder = subport->grinder + pos;
 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
+	uint32_t qindex = grinder->qindex[grinder->qpos];
 	struct rte_mbuf *pkt = grinder->pkt;
 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
 	uint32_t be_tc_active;
@@ -2421,15 +2528,16 @@ grinder_schedule(struct rte_sched_port *port,
 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
 
 	if (queue->qr == queue->qw) {
-		uint32_t qindex = grinder->qindex[grinder->qpos];
-
 		rte_bitmap_clear(subport->bmp, qindex);
 		grinder->qmask &= ~(1 << grinder->qpos);
 		if (be_tc_active)
 			grinder->wrr_mask[grinder->qpos] = 0;
-		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
+
+		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
 	}
 
+	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
+
 	/* Reset pipe loop detection */
 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
 	grinder->productive = 1;
diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h
index cb851301e9..484dbdcc3d 100644
--- a/lib/sched/rte_sched.h
+++ b/lib/sched/rte_sched.h
@@ -61,10 +61,9 @@ extern "C" {
 #include <rte_mbuf.h>
 #include <rte_meter.h>
 
-/** Random Early Detection (RED) */
-#ifdef RTE_SCHED_RED
+/** Congestion Management */
 #include "rte_red.h"
-#endif
+#include "rte_pie.h"
 
 /** Maximum number of queues per pipe.
  * Note that the multiple queues (power of 2) can only be assigned to
@@ -110,6 +109,28 @@ extern "C" {
 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 #endif
 
+/**
+ * Congestion Management (CMAN) mode
+ *
+ * This is used for controlling the admission of packets into a packet queue or
+ * group of packet queues on congestion.
+ *
+ * The *Random Early Detection (RED)* algorithm works by proactively dropping
+ * more and more input packets as the queue occupancy builds up. When the queue
+ * is full or almost full, RED effectively works as *tail drop*. The *Weighted
+ * RED* algorithm uses a separate set of RED thresholds for each packet color.
+ *
+ * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
+ * drops a packet at the onset of the congestion and tries to control the
+ * latency around the target value. The congestion detection, however, is based
+ * on the queueing latency instead of the queue length like RED. For more
+ * information, refer RFC8033.
+ */
+enum rte_sched_cman_mode {
+	RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
+	RTE_SCHED_CMAN_PIE, /**< Proportional Integral Controller Enhanced (PIE) */
+};
+
 /*
  * Pipe configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -139,6 +160,22 @@ struct rte_sched_pipe_params {
 	uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 };
 
+/*
+ * Congestion Management configuration parameters.
+ */
+struct rte_sched_cman_params {
+	/** Congestion Management mode */
+	enum rte_sched_cman_mode cman_mode;
+
+	union {
+		/** RED parameters */
+		struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+
+		/** PIE parameters */
+		struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+	};
+};
+
 /*
  * Subport configuration parameters. The period and credits_per_period
  * parameters are measured in bytes, with one byte meaning the time
@@ -174,10 +211,11 @@ struct rte_sched_subport_params {
 	/** Max allowed profiles in the pipe profile table */
 	uint32_t n_max_pipe_profiles;
 
-#ifdef RTE_SCHED_RED
-	/** RED parameters */
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
-#endif
+	/** Congestion Management parameters
+	 * If NULL the congestion management is disabled for the subport,
+	 * otherwise proper parameters need to be provided.
+	 */
+	struct rte_sched_cman_params *cman_params;
 };
 
 struct rte_sched_subport_profile_params {
@@ -208,10 +246,8 @@ struct rte_sched_subport_stats {
 	/** Number of bytes dropped for each traffic class */
 	uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 
-#ifdef RTE_SCHED_RED
-	/** Number of packets dropped by red */
-	uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
-#endif
+	/** Number of packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 };
 
 /** Queue statistics */
@@ -222,10 +258,8 @@ struct rte_sched_queue_stats {
 	/** Packets dropped */
 	uint64_t n_pkts_dropped;
 
-#ifdef RTE_SCHED_RED
-	/** Packets dropped by RED */
-	uint64_t n_pkts_red_dropped;
-#endif
+	/** Packets dropped by congestion management scheme */
+	uint64_t n_pkts_cman_dropped;
 
 	/** Bytes successfully written */
 	uint64_t n_bytes;
diff --git a/lib/sched/version.map b/lib/sched/version.map
index a6e505c8ac..d22c07fc9f 100644
--- a/lib/sched/version.map
+++ b/lib/sched/version.map
@@ -30,4 +30,8 @@ EXPERIMENTAL {
 
 	# added in 20.11
 	rte_sched_port_subport_profile_add;
+
+	# added in 21.11
+	rte_pie_rt_data_init;
+	rte_pie_config_init;
 };
-- 
2.33.0


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v24 2/3] examples/qos_sched: support PIE congestion management
  2021-11-04 14:55                                             ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
  2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 1/3] sched: add PIE based congestion management Thomas Monjalon
@ 2021-11-04 14:55                                               ` Thomas Monjalon
  2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 3/3] examples/ip_pipeline: " Thomas Monjalon
  2021-11-04 15:07                                               ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
  3 siblings, 0 replies; 178+ messages in thread
From: Thomas Monjalon @ 2021-11-04 14:55 UTC (permalink / raw)
  To: dev
  Cc: megha.ajmera, Wojciech Liguzinski, Cristian Dumitrescu, Jasvinder Singh

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

patch add support enable PIE or RED by
parsing config file.

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 examples/qos_sched/cfg_file.c  | 121 +++++++++++++++-----
 examples/qos_sched/cfg_file.h  |   5 +
 examples/qos_sched/init.c      |  23 ++--
 examples/qos_sched/main.h      |   3 +
 examples/qos_sched/profile.cfg | 196 ++++++++++++++++++++++-----------
 5 files changed, 245 insertions(+), 103 deletions(-)

diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
index 4bef887099..450482f07d 100644
--- a/examples/qos_sched/cfg_file.c
+++ b/examples/qos_sched/cfg_file.c
@@ -229,6 +229,40 @@ cfg_load_subport_profile(struct rte_cfgfile *cfg,
 	return 0;
 }
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p)
+{
+	int j, k;
+	subport_p->cman_params->cman_mode = cman_p.cman_mode;
+
+	for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+		if (subport_p->cman_params->cman_mode ==
+					RTE_SCHED_CMAN_RED) {
+			for (k = 0; k < RTE_COLORS; k++) {
+				subport_p->cman_params->red_params[j][k].min_th =
+					cman_p.red_params[j][k].min_th;
+				subport_p->cman_params->red_params[j][k].max_th =
+					cman_p.red_params[j][k].max_th;
+				subport_p->cman_params->red_params[j][k].maxp_inv =
+					cman_p.red_params[j][k].maxp_inv;
+				subport_p->cman_params->red_params[j][k].wq_log2 =
+					cman_p.red_params[j][k].wq_log2;
+			}
+		} else {
+			subport_p->cman_params->pie_params[j].qdelay_ref =
+				cman_p.pie_params[j].qdelay_ref;
+			subport_p->cman_params->pie_params[j].dp_update_interval =
+				cman_p.pie_params[j].dp_update_interval;
+			subport_p->cman_params->pie_params[j].max_burst =
+				cman_p.pie_params[j].max_burst;
+			subport_p->cman_params->pie_params[j].tailq_th =
+				cman_p.pie_params[j].tailq_th;
+		}
+	}
+}
+#endif
+
 int
 cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
 {
@@ -243,24 +277,25 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 	n_active_queues = 0;
 
 #ifdef RTE_SCHED_CMAN
-	char sec_name[CFG_NAME_LEN];
-	struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
+	struct rte_sched_cman_params cman_params = {
+		.cman_mode = RTE_SCHED_CMAN_RED,
+		.red_params = { },
+	};
 
-	snprintf(sec_name, sizeof(sec_name), "red");
-
-	if (rte_cfgfile_has_section(cfg, sec_name)) {
+	if (rte_cfgfile_has_section(cfg, "red")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_RED;
 
 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
 			char str[32];
 
-			/* Parse WRED min thresholds */
-			snprintf(str, sizeof(str), "tc %d wred min", i);
-			entry = rte_cfgfile_get_entry(cfg, sec_name, str);
+			/* Parse RED min thresholds */
+			snprintf(str, sizeof(str), "tc %d red min", i);
+			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].min_th
+					cman_params.red_params[i][j].min_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -268,14 +303,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED max thresholds */
-			snprintf(str, sizeof(str), "tc %d wred max", i);
+			/* Parse RED max thresholds */
+			snprintf(str, sizeof(str), "tc %d red max", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].max_th
+					cman_params.red_params[i][j].max_th
 						= (uint16_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -283,14 +318,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED inverse mark probabilities */
-			snprintf(str, sizeof(str), "tc %d wred inv prob", i);
+			/* Parse RED inverse mark probabilities */
+			snprintf(str, sizeof(str), "tc %d red inv prob", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].maxp_inv
+					cman_params.red_params[i][j].maxp_inv
 						= (uint8_t)strtol(entry, &next, 10);
 
 					if (next == NULL)
@@ -299,14 +334,14 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 
-			/* Parse WRED EWMA filter weights */
-			snprintf(str, sizeof(str), "tc %d wred weight", i);
+			/* Parse RED EWMA filter weights */
+			snprintf(str, sizeof(str), "tc %d red weight", i);
 			entry = rte_cfgfile_get_entry(cfg, "red", str);
 			if (entry) {
 				char *next;
 				/* for each packet colour (green, yellow, red) */
 				for (j = 0; j < RTE_COLORS; j++) {
-					red_params[i][j].wq_log2
+					cman_params.red_params[i][j].wq_log2
 						= (uint8_t)strtol(entry, &next, 10);
 					if (next == NULL)
 						break;
@@ -315,6 +350,43 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 			}
 		}
 	}
+
+	if (rte_cfgfile_has_section(cfg, "pie")) {
+		cman_params.cman_mode = RTE_SCHED_CMAN_PIE;
+
+		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+			char str[32];
+
+			/* Parse Queue Delay Ref value */
+			snprintf(str, sizeof(str), "tc %d qdelay ref", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].qdelay_ref =
+					(uint16_t) atoi(entry);
+
+			/* Parse Max Burst value */
+			snprintf(str, sizeof(str), "tc %d max burst", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].max_burst =
+					(uint16_t) atoi(entry);
+
+			/* Parse Update Interval Value */
+			snprintf(str, sizeof(str), "tc %d update interval", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].dp_update_interval =
+					(uint16_t) atoi(entry);
+
+			/* Parse Tailq Threshold Value */
+			snprintf(str, sizeof(str), "tc %d tailq th", i);
+			entry = rte_cfgfile_get_entry(cfg, "pie", str);
+			if (entry)
+				cman_params.pie_params[i].tailq_th =
+					(uint16_t) atoi(entry);
+
+		}
+	}
 #endif /* RTE_SCHED_CMAN */
 
 	for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
@@ -394,18 +466,7 @@ cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subpo
 				}
 			}
 #ifdef RTE_SCHED_CMAN
-			for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
-				for (k = 0; k < RTE_COLORS; k++) {
-					subport_params[i].red_params[j][k].min_th =
-						red_params[j][k].min_th;
-					subport_params[i].red_params[j][k].max_th =
-						red_params[j][k].max_th;
-					subport_params[i].red_params[j][k].maxp_inv =
-						red_params[j][k].maxp_inv;
-					subport_params[i].red_params[j][k].wq_log2 =
-						red_params[j][k].wq_log2;
-				}
-			}
+			set_subport_cman_params(subport_params+i, cman_params);
 #endif
 		}
 	}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
index 0dc458aa71..1a9dce9db5 100644
--- a/examples/qos_sched/cfg_file.h
+++ b/examples/qos_sched/cfg_file.h
@@ -12,6 +12,11 @@ int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
 
 int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
 
+#ifdef RTE_SCHED_CMAN
+void set_subport_cman_params(struct rte_sched_subport_params *subport_p,
+					struct rte_sched_cman_params cman_p);
+#endif
+
 int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
 
 int cfg_load_subport_profile(struct rte_cfgfile *cfg,
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
index 3bdc653c69..3c1f0bc680 100644
--- a/examples/qos_sched/init.c
+++ b/examples/qos_sched/init.c
@@ -203,15 +203,9 @@ static struct rte_sched_subport_profile_params
 	},
 };
 
-struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
-	{
-		.n_pipes_per_subport_enabled = 4096,
-		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
-		.pipe_profiles = pipe_profiles,
-		.n_pipe_profiles = sizeof(pipe_profiles) /
-			sizeof(struct rte_sched_pipe_params),
-		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
 #ifdef RTE_SCHED_CMAN
+struct rte_sched_cman_params cman_params = {
+	.cman_mode = RTE_SCHED_CMAN_RED,
 	.red_params = {
 		/* Traffic Class 0 Colors Green / Yellow / Red */
 		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
@@ -278,6 +272,19 @@ struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
 		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
 	},
+};
+#endif /* RTE_SCHED_CMAN */
+
+struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+	{
+		.n_pipes_per_subport_enabled = 4096,
+		.qsize = {64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+		.pipe_profiles = pipe_profiles,
+		.n_pipe_profiles = sizeof(pipe_profiles) /
+			sizeof(struct rte_sched_pipe_params),
+		.n_max_pipe_profiles = MAX_SCHED_PIPE_PROFILES,
+#ifdef RTE_SCHED_CMAN
+		.cman_params = &cman_params,
 #endif /* RTE_SCHED_CMAN */
 	},
 };
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 0d6815ae69..915311bac8 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -153,6 +153,9 @@ extern uint32_t active_queues[RTE_SCHED_QUEUES_PER_PIPE];
 extern uint32_t n_active_queues;
 
 extern struct rte_sched_port_params port_params;
+#ifdef RTE_SCHED_CMAN
+extern struct rte_sched_cman_params cman_params;
+#endif
 extern struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS];
 
 int app_parse_args(int argc, char **argv);
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
index 4486d2799e..d4b21c0170 100644
--- a/examples/qos_sched/profile.cfg
+++ b/examples/qos_sched/profile.cfg
@@ -76,68 +76,134 @@ tc 12 oversubscription weight = 1
 tc 12 wrr weights = 1 1 1 1
 
 ; RED params per traffic class and color (Green / Yellow / Red)
-[red]
-tc 0 wred min = 48 40 32
-tc 0 wred max = 64 64 64
-tc 0 wred inv prob = 10 10 10
-tc 0 wred weight = 9 9 9
-
-tc 1 wred min = 48 40 32
-tc 1 wred max = 64 64 64
-tc 1 wred inv prob = 10 10 10
-tc 1 wred weight = 9 9 9
-
-tc 2 wred min = 48 40 32
-tc 2 wred max = 64 64 64
-tc 2 wred inv prob = 10 10 10
-tc 2 wred weight = 9 9 9
-
-tc 3 wred min = 48 40 32
-tc 3 wred max = 64 64 64
-tc 3 wred inv prob = 10 10 10
-tc 3 wred weight = 9 9 9
-
-tc 4 wred min = 48 40 32
-tc 4 wred max = 64 64 64
-tc 4 wred inv prob = 10 10 10
-tc 4 wred weight = 9 9 9
-
-tc 5 wred min = 48 40 32
-tc 5 wred max = 64 64 64
-tc 5 wred inv prob = 10 10 10
-tc 5 wred weight = 9 9 9
-
-tc 6 wred min = 48 40 32
-tc 6 wred max = 64 64 64
-tc 6 wred inv prob = 10 10 10
-tc 6 wred weight = 9 9 9
-
-tc 7 wred min = 48 40 32
-tc 7 wred max = 64 64 64
-tc 7 wred inv prob = 10 10 10
-tc 7 wred weight = 9 9 9
-
-tc 8 wred min = 48 40 32
-tc 8 wred max = 64 64 64
-tc 8 wred inv prob = 10 10 10
-tc 8 wred weight = 9 9 9
-
-tc 9 wred min = 48 40 32
-tc 9 wred max = 64 64 64
-tc 9 wred inv prob = 10 10 10
-tc 9 wred weight = 9 9 9
-
-tc 10 wred min = 48 40 32
-tc 10 wred max = 64 64 64
-tc 10 wred inv prob = 10 10 10
-tc 10 wred weight = 9 9 9
-
-tc 11 wred min = 48 40 32
-tc 11 wred max = 64 64 64
-tc 11 wred inv prob = 10 10 10
-tc 11 wred weight = 9 9 9
-
-tc 12 wred min = 48 40 32
-tc 12 wred max = 64 64 64
-tc 12 wred inv prob = 10 10 10
-tc 12 wred weight = 9 9 9
+;[red]
+;tc 0 wred min = 48 40 32
+;tc 0 wred max = 64 64 64
+;tc 0 wred inv prob = 10 10 10
+;tc 0 wred weight = 9 9 9
+
+;tc 1 wred min = 48 40 32
+;tc 1 wred max = 64 64 64
+;tc 1 wred inv prob = 10 10 10
+;tc 1 wred weight = 9 9 9
+
+;tc 2 wred min = 48 40 32
+;tc 2 wred max = 64 64 64
+;tc 2 wred inv prob = 10 10 10
+;tc 2 wred weight = 9 9 9
+
+;tc 3 wred min = 48 40 32
+;tc 3 wred max = 64 64 64
+;tc 3 wred inv prob = 10 10 10
+;tc 3 wred weight = 9 9 9
+
+;tc 4 wred min = 48 40 32
+;tc 4 wred max = 64 64 64
+;tc 4 wred inv prob = 10 10 10
+;tc 4 wred weight = 9 9 9
+
+;tc 5 wred min = 48 40 32
+;tc 5 wred max = 64 64 64
+;tc 5 wred inv prob = 10 10 10
+;tc 5 wred weight = 9 9 9
+
+;tc 6 wred min = 48 40 32
+;tc 6 wred max = 64 64 64
+;tc 6 wred inv prob = 10 10 10
+;tc 6 wred weight = 9 9 9
+
+;tc 7 wred min = 48 40 32
+;tc 7 wred max = 64 64 64
+;tc 7 wred inv prob = 10 10 10
+;tc 7 wred weight = 9 9 9
+
+;tc 8 wred min = 48 40 32
+;tc 8 wred max = 64 64 64
+;tc 8 wred inv prob = 10 10 10
+;tc 8 wred weight = 9 9 9
+
+;tc 9 wred min = 48 40 32
+;tc 9 wred max = 64 64 64
+;tc 9 wred inv prob = 10 10 10
+;tc 9 wred weight = 9 9 9
+
+;tc 10 wred min = 48 40 32
+;tc 10 wred max = 64 64 64
+;tc 10 wred inv prob = 10 10 10
+;tc 10 wred weight = 9 9 9
+
+;tc 11 wred min = 48 40 32
+;tc 11 wred max = 64 64 64
+;tc 11 wred inv prob = 10 10 10
+;tc 11 wred weight = 9 9 9
+
+;tc 12 wred min = 48 40 32
+;tc 12 wred max = 64 64 64
+;tc 12 wred inv prob = 10 10 10
+;tc 12 wred weight = 9 9 9
+
+[pie]
+tc 0 qdelay ref = 15
+tc 0 max burst = 150
+tc 0 update interval = 15
+tc 0 tailq th = 64
+
+tc 1 qdelay ref = 15
+tc 1 max burst = 150
+tc 1 update interval = 15
+tc 1 tailq th = 64
+
+tc 2 qdelay ref = 15
+tc 2 max burst = 150
+tc 2 update interval = 15
+tc 2 tailq th = 64
+
+tc 3 qdelay ref = 15
+tc 3 max burst = 150
+tc 3 update interval = 15
+tc 3 tailq th = 64
+
+tc 4 qdelay ref = 15
+tc 4 max burst = 150
+tc 4 update interval = 15
+tc 4 tailq th = 64
+
+tc 5 qdelay ref = 15
+tc 5 max burst = 150
+tc 5 update interval = 15
+tc 5 tailq th = 64
+
+tc 6 qdelay ref = 15
+tc 6 max burst = 150
+tc 6 update interval = 15
+tc 6 tailq th = 64
+
+tc 7 qdelay ref = 15
+tc 7 max burst = 150
+tc 7 update interval = 15
+tc 7 tailq th = 64
+
+tc 8 qdelay ref = 15
+tc 8 max burst = 150
+tc 8 update interval = 15
+tc 8 tailq th = 64
+
+tc 9 qdelay ref = 15
+tc 9 max burst = 150
+tc 9 update interval = 15
+tc 9 tailq th = 64
+
+tc 10 qdelay ref = 15
+tc 10 max burst = 150
+tc 10 update interval = 15
+tc 10 tailq th = 64
+
+tc 11 qdelay ref = 15
+tc 11 max burst = 150
+tc 11 update interval = 15
+tc 11 tailq th = 64
+
+tc 12 qdelay ref = 15
+tc 12 max burst = 150
+tc 12 update interval = 15
+tc 12 tailq th = 64
-- 
2.33.0


^ permalink raw reply	[flat|nested] 178+ messages in thread

* [dpdk-dev] [PATCH v24 3/3] examples/ip_pipeline: support PIE congestion management
  2021-11-04 14:55                                             ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
  2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 1/3] sched: add PIE based congestion management Thomas Monjalon
  2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 2/3] examples/qos_sched: support PIE " Thomas Monjalon
@ 2021-11-04 14:55                                               ` Thomas Monjalon
  2021-11-04 15:07                                               ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
  3 siblings, 0 replies; 178+ messages in thread
From: Thomas Monjalon @ 2021-11-04 14:55 UTC (permalink / raw)
  To: dev
  Cc: megha.ajmera, Wojciech Liguzinski, Cristian Dumitrescu, Jasvinder Singh

From: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>

Adding the PIE support for IP Pipeline

Signed-off-by: Wojciech Liguzinski <wojciechx.liguzinski@intel.com>
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Acked-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 examples/ip_pipeline/tmgr.c | 138 +++++++++++++++++++-----------------
 1 file changed, 72 insertions(+), 66 deletions(-)

diff --git a/examples/ip_pipeline/tmgr.c b/examples/ip_pipeline/tmgr.c
index 406184e760..b138e885cf 100644
--- a/examples/ip_pipeline/tmgr.c
+++ b/examples/ip_pipeline/tmgr.c
@@ -17,6 +17,77 @@ static uint32_t n_subport_profiles;
 static struct rte_sched_pipe_params
 	pipe_profile[TMGR_PIPE_PROFILE_MAX];
 
+#ifdef RTE_SCHED_CMAN
+static struct rte_sched_cman_params cman_params = {
+	.red_params = {
+		/* Traffic Class 0 Colors Green / Yellow / Red */
+		[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 1 - Colors Green / Yellow / Red */
+		[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 2 - Colors Green / Yellow / Red */
+		[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 3 - Colors Green / Yellow / Red */
+		[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 4 - Colors Green / Yellow / Red */
+		[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 5 - Colors Green / Yellow / Red */
+		[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 6 - Colors Green / Yellow / Red */
+		[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 7 - Colors Green / Yellow / Red */
+		[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 8 - Colors Green / Yellow / Red */
+		[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 9 - Colors Green / Yellow / Red */
+		[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 10 - Colors Green / Yellow / Red */
+		[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 11 - Colors Green / Yellow / Red */
+		[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+		/* Traffic Class 12 - Colors Green / Yellow / Red */
+		[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+		},
+};
+#endif /* RTE_SCHED_CMAN */
+
 static uint32_t n_pipe_profiles;
 
 static const struct rte_sched_subport_params subport_params_default = {
@@ -26,72 +97,7 @@ static const struct rte_sched_subport_params subport_params_default = {
 	.n_pipe_profiles = 0, /* filled at run time */
 	.n_max_pipe_profiles = RTE_DIM(pipe_profile),
 #ifdef RTE_SCHED_CMAN
-.red_params = {
-	/* Traffic Class 0 Colors Green / Yellow / Red */
-	[0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 1 - Colors Green / Yellow / Red */
-	[1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 2 - Colors Green / Yellow / Red */
-	[2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 3 - Colors Green / Yellow / Red */
-	[3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 4 - Colors Green / Yellow / Red */
-	[4][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[4][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 5 - Colors Green / Yellow / Red */
-	[5][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[5][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 6 - Colors Green / Yellow / Red */
-	[6][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[6][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 7 - Colors Green / Yellow / Red */
-	[7][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[7][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 8 - Colors Green / Yellow / Red */
-	[8][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[8][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 9 - Colors Green / Yellow / Red */
-	[9][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[9][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 10 - Colors Green / Yellow / Red */
-	[10][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[10][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 11 - Colors Green / Yellow / Red */
-	[11][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[11][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-
-	/* Traffic Class 12 - Colors Green / Yellow / Red */
-	[12][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	[12][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
-	},
+	.cman_params = &cman_params,
 #endif /* RTE_SCHED_CMAN */
 };
 
-- 
2.33.0


^ permalink raw reply	[flat|nested] 178+ messages in thread

* Re: [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library
  2021-11-04 14:55                                             ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
                                                                 ` (2 preceding siblings ...)
  2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 3/3] examples/ip_pipeline: " Thomas Monjalon
@ 2021-11-04 15:07                                               ` Thomas Monjalon
  3 siblings, 0 replies; 178+ messages in thread
From: Thomas Monjalon @ 2021-11-04 15:07 UTC (permalink / raw)
  To: Wojciech Liguzinski, Cristian Dumitrescu, Jasvinder Singh
  Cc: dev, megha.ajmera, Bruce Richardson, david.marchand, john.mcnamara

04/11/2021 15:55, Thomas Monjalon:
> last changes to make this series "more acceptable":
> - RTE_SCHED_CMAN in rte_config.h, replacing RTE_SCHED_RED
> - test file listed in MAINTAINERS
> - few whitespaces fixed

Applied this last version.

The maintainers of this library didn't help really.
Review of basic stuff and process well known by maintainers were not done.
Even after doing these last changes myself, this library is still in a poor state.
The most problematic is the big amount of dead code disabled in config/rte_config.h:
	#undef RTE_SCHED_CMAN
	#undef RTE_SCHED_COLLECT_STATS
	#undef RTE_SCHED_SUBPORT_TC_OV
	#define RTE_SCHED_PORT_N_GRINDERS 8
	#undef RTE_SCHED_VECTOR

I think the most appropriate going forward is to stop merging any feature
in the libraries and examples related to sched and pipeline,
until a proper resolution is done for this dead code.
Tips: make runtime options and properly document them.

Note: I probably won't reply to this thread until 21.11 is done,
because I have other priorities and I don't want to hear about this for some time.



^ permalink raw reply	[flat|nested] 178+ messages in thread

end of thread, other threads:[~2021-11-04 15:07 UTC | newest]

Thread overview: 178+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-24 10:58 [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 1/3] sched: add pie based congestion management Liguzinski, WojciechX
2021-05-25  9:16   ` Morten Brørup
2021-06-09  8:36     ` Liguzinski, WojciechX
2021-06-09 12:35       ` Morten Brørup
2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 2/3] example/qos_sched: add pie support Liguzinski, WojciechX
2021-05-24 10:58 ` [dpdk-dev] [RFC PATCH 3/3] example/ip_pipeline: " Liguzinski, WojciechX
2021-05-24 16:19 ` [dpdk-dev] [RFC PATCH 0/3] Add PIE support for HQoS library Stephen Hemminger
2021-05-25  8:56 ` Morten Brørup
2021-06-07 13:01   ` Liguzinski, WojciechX
2021-06-09 10:53 ` [dpdk-dev] [RFC PATCH v1 " Liguzinski, WojciechX
2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-06-09 10:53   ` [dpdk-dev] [RFC PATCH v1 3/3] example/ip_pipeline: " Liguzinski, WojciechX
2021-06-15  9:01   ` [dpdk-dev] [RFC PATCH v2 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
2021-06-15  9:01     ` [dpdk-dev] [RFC PATCH v2 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-06-15 12:23       ` Morten Brørup
2021-06-15  9:02     ` [dpdk-dev] [RFC PATCH v2 3/3] example/ip_pipeline: " Liguzinski, WojciechX
2021-06-21  7:35     ` [dpdk-dev] [RFC PATCH v3 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
2021-06-21 18:17         ` Stephen Hemminger
2021-06-22  7:39           ` Liguzinski, WojciechX
2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 2/3] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-06-21  7:35       ` [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: " Liguzinski, WojciechX
2021-07-05  8:04       ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
2021-07-16 13:20           ` Dumitrescu, Cristian
2021-07-16 15:11           ` Dumitrescu, Cristian
2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v4 2/3] example/qos_sched: add pie support Liguzinski, WojciechX
2021-07-05  8:04         ` [dpdk-dev] [RFC PATCH v3 3/3] example/ip_pipeline: add PIE support Liguzinski, WojciechX
2021-07-16 12:46         ` [dpdk-dev] [RFC PATCH v4 0/3] Add PIE support for HQoS library Dumitrescu, Cristian
2021-09-07  7:33         ` [dpdk-dev] [RFC PATCH v5 0/5] " Liguzinski, WojciechX
2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-09-07 19:14             ` Stephen Hemminger
2021-09-08  8:49               ` Liguzinski, WojciechX
2021-10-14 15:13               ` Liguzinski, WojciechX
2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 2/5] example/qos_sched: add pie support Liguzinski, WojciechX
2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 3/5] example/ip_pipeline: add PIE support Liguzinski, WojciechX
2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-09-07  7:33           ` [dpdk-dev] [RFC PATCH v5 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-09-07 14:11           ` [dpdk-dev] [RFC PATCH v6 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-09-07 14:11             ` [dpdk-dev] [RFC PATCH v6 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-09-22  7:46             ` [dpdk-dev] [RFC PATCH v7 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-09-22  7:46               ` [dpdk-dev] [RFC PATCH v7 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-09-23  9:45               ` [dpdk-dev] [RFC PATCH v8 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-09-23  9:45                 ` [dpdk-dev] [RFC PATCH v8 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-11  7:55                 ` [dpdk-dev] [PATCH v9 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-12 15:59                     ` Dumitrescu, Cristian
2021-10-12 18:34                       ` Liguzinski, WojciechX
2021-10-14 16:02                         ` Liguzinski, WojciechX
2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-11  7:55                   ` [dpdk-dev] [PATCH v9 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-14 11:34                   ` [dpdk-dev] [PATCH v10 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-14 11:34                     ` [dpdk-dev] [PATCH v10 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-14 12:38                     ` [dpdk-dev] [PATCH v11 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-14 12:38                       ` [dpdk-dev] [PATCH v11 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-14 15:11                       ` [dpdk-dev] [PATCH v12 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-14 15:11                         ` [dpdk-dev] [PATCH v12 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-14 15:33                         ` [dpdk-dev] [PATCH v13 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-14 15:33                           ` [dpdk-dev] [PATCH v13 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-15  8:16                           ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-15 13:51                               ` Dumitrescu, Cristian
2021-10-19  9:34                                 ` Liguzinski, WojciechX
2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-15  8:16                             ` [dpdk-dev] [PATCH v14 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-15 13:56                             ` [dpdk-dev] [PATCH v14 0/5] Add PIE support for HQoS library Dumitrescu, Cristian
2021-10-19  8:26                               ` Liguzinski, WojciechX
2021-10-19  8:18                             ` [dpdk-dev] [PATCH v15 " Liguzinski, WojciechX
2021-10-19  8:18                               ` [dpdk-dev] [PATCH v15 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-19  8:18                               ` [dpdk-dev] [PATCH v15 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-19  8:19                               ` [dpdk-dev] [PATCH v15 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-19 12:18                               ` [dpdk-dev] [PATCH v15 0/5] Add PIE support for HQoS library Dumitrescu, Cristian
2021-10-19 12:45                               ` [dpdk-dev] [PATCH v16 " Liguzinski, WojciechX
2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-19 12:45                                 ` [dpdk-dev] [PATCH v16 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-20  7:49                                 ` [dpdk-dev] [PATCH v17 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-20  7:49                                   ` [dpdk-dev] [PATCH v17 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-20 15:11                                     ` Stephen Hemminger
2021-10-20 18:28                                       ` Liguzinski, WojciechX
2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-20  7:50                                   ` [dpdk-dev] [PATCH v17 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-25 11:32                                   ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-26 21:07                                       ` Singh, Jasvinder
2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-26 21:08                                       ` Singh, Jasvinder
2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-26 21:09                                       ` Singh, Jasvinder
2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-26 21:09                                       ` Singh, Jasvinder
2021-10-25 11:32                                     ` [dpdk-dev] [PATCH v18 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-10-26 21:11                                       ` Singh, Jasvinder
2021-10-26  8:24                                     ` [dpdk-dev] [PATCH v18 0/5] Add PIE support for HQoS library Liu, Yu Y
2021-10-26  8:33                                       ` Thomas Monjalon
2021-10-26 10:02                                         ` Dumitrescu, Cristian
2021-10-26 10:10                                           ` Thomas Monjalon
2021-10-26 10:20                                             ` Liguzinski, WojciechX
2021-10-26 10:25                                               ` Thomas Monjalon
2021-10-28 10:17                                     ` [dpdk-dev] [PATCH v19 " Liguzinski, WojciechX
2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-10-29 13:44                                         ` Thomas Monjalon
2021-11-02 13:15                                           ` Liguzinski, WojciechX
2021-10-29 13:57                                         ` Thomas Monjalon
2021-10-29 14:06                                           ` Dumitrescu, Cristian
2021-10-29 14:15                                             ` Thomas Monjalon
2021-10-28 10:17                                       ` [dpdk-dev] [PATCH v19 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-10-28 10:18                                       ` [dpdk-dev] [PATCH v19 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-11-02 23:57                                       ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Liguzinski, WojciechX
2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 1/5] sched: add PIE based congestion management Liguzinski, WojciechX
2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 2/5] example/qos_sched: add PIE support Liguzinski, WojciechX
2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 3/5] example/ip_pipeline: " Liguzinski, WojciechX
2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 4/5] doc/guides/prog_guide: added PIE Liguzinski, WojciechX
2021-11-02 23:57                                         ` [dpdk-dev] [PATCH v20 5/5] app/test: add tests for PIE Liguzinski, WojciechX
2021-11-03 17:52                                         ` [dpdk-dev] [PATCH v20 0/5] Add PIE support for HQoS library Thomas Monjalon
2021-11-04  8:29                                           ` Liguzinski, WojciechX
2021-11-04 10:40                                         ` [dpdk-dev] [PATCH v21 0/3] " Liguzinski, WojciechX
2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
2021-11-04 10:40                                           ` [dpdk-dev] [PATCH v21 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
2021-11-04 10:49                                           ` [dpdk-dev] [PATCH v22 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
2021-11-04 10:49                                             ` [dpdk-dev] [PATCH v22 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
2021-11-04 11:03                                             ` [dpdk-dev] [PATCH v23 0/3] Add PIE support for HQoS library Liguzinski, WojciechX
2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 1/3] sched: add PIE based congestion management Liguzinski, WojciechX
2021-11-04 13:58                                                 ` Thomas Monjalon
2021-11-04 14:24                                                   ` Dumitrescu, Cristian
2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 2/3] examples/qos_sched: add PIE support Liguzinski, WojciechX
2021-11-04 11:03                                               ` [dpdk-dev] [PATCH v23 3/3] examples/ip_pipeline: " Liguzinski, WojciechX
2021-11-04 14:55                                             ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon
2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 1/3] sched: add PIE based congestion management Thomas Monjalon
2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 2/3] examples/qos_sched: support PIE " Thomas Monjalon
2021-11-04 14:55                                               ` [dpdk-dev] [PATCH v24 3/3] examples/ip_pipeline: " Thomas Monjalon
2021-11-04 15:07                                               ` [dpdk-dev] [PATCH v24 0/3] Add PIE support for HQoS library Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).