From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 9E9A2A0C41; Tue, 7 Sep 2021 09:33:46 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 423984111D; Tue, 7 Sep 2021 09:33:46 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by mails.dpdk.org (Postfix) with ESMTP id DFEDE41104 for ; Tue, 7 Sep 2021 09:33:43 +0200 (CEST) X-IronPort-AV: E=McAfee;i="6200,9189,10099"; a="216965119" X-IronPort-AV: E=Sophos;i="5.85,274,1624345200"; d="scan'208";a="216965119" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga102.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Sep 2021 00:33:43 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.85,274,1624345200"; d="scan'208";a="692338876" Received: from silpixa00400629.ir.intel.com ([10.237.213.30]) by fmsmga006.fm.intel.com with ESMTP; 07 Sep 2021 00:33:41 -0700 From: "Liguzinski, WojciechX" To: dev@dpdk.org, jasvinder.singh@intel.com, cristian.dumitrescu@intel.com Cc: megha.ajmera@intel.com Date: Tue, 7 Sep 2021 07:33:24 +0000 Message-Id: <20210907073328.1498973-2-wojciechx.liguzinski@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20210907073328.1498973-1-wojciechx.liguzinski@intel.com> References: <20210705080421.18736-1-wojciechx.liguzinski@intel.com> <20210907073328.1498973-1-wojciechx.liguzinski@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [RFC PATCH v5 1/5] sched: add PIE based congestion management X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Implement PIE based congestion management based on rfc8033 Signed-off-by: Liguzinski, WojciechX --- drivers/net/softnic/rte_eth_softnic_tm.c | 6 +- lib/sched/meson.build | 10 +- lib/sched/rte_pie.c | 82 +++++ lib/sched/rte_pie.h | 393 +++++++++++++++++++++++ lib/sched/rte_sched.c | 228 +++++++++---- lib/sched/rte_sched.h | 53 ++- lib/sched/version.map | 3 + 7 files changed, 685 insertions(+), 90 deletions(-) create mode 100644 lib/sched/rte_pie.c create mode 100644 lib/sched/rte_pie.h diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c index 90baba15ce..5b6c4e6d4b 100644 --- a/drivers/net/softnic/rte_eth_softnic_tm.c +++ b/drivers/net/softnic/rte_eth_softnic_tm.c @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev, return 0; } -#ifdef RTE_SCHED_RED +#ifdef RTE_SCHED_AQM #define WRED_SUPPORTED 1 #else #define WRED_SUPPORTED 0 @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, uint32_t tc_id) return NULL; } -#ifdef RTE_SCHED_RED +#ifdef RTE_SCHED_AQM static void wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id) @@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id) for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc_id++) for (color = RTE_COLOR_GREEN; color < RTE_COLORS; color++) { struct rte_red_params *dst = - &pp->red_params[tc_id][color]; + &pp->wred_params[tc_id][color]; struct tm_wred_profile *src_wp = tm_tc_wred_profile_get(dev, tc_id); struct rte_tm_red_params *src = diff --git a/lib/sched/meson.build b/lib/sched/meson.build index b24f7b8775..e7ae9bcf19 100644 --- a/lib/sched/meson.build +++ b/lib/sched/meson.build @@ -1,11 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel Corporation -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c') -headers = files( - 'rte_approx.h', - 'rte_red.h', - 'rte_sched.h', - 'rte_sched_common.h', -) +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c') +headers = files('rte_sched.h', 'rte_sched_common.h', + 'rte_red.h', 'rte_approx.h', 'rte_pie.h') deps += ['mbuf', 'meter'] diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c new file mode 100644 index 0000000000..2fcecb2db4 --- /dev/null +++ b/lib/sched/rte_pie.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include + +#include "rte_pie.h" +#include +#include +#include + +#ifdef __INTEL_COMPILER +#pragma warning(disable:2259) /* conversion may lose significant bits */ +#endif + +void +rte_pie_rt_data_init(struct rte_pie *pie) +{ + if (pie == NULL) { + /* Allocate memory to use the PIE data structure */ + pie = rte_malloc(NULL, sizeof(struct rte_pie), 0); + + if (pie == NULL) + RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__); + } + + pie->active = 0; + pie->in_measurement = 0; + pie->departed_bytes_count = 0; + pie->start_measurement = 0; + pie->last_measurement = 0; + pie->qlen = 0; + pie->avg_dq_time = 0; + pie->burst_allowance = 0; + pie->qdelay_old = 0; + pie->drop_prob = 0; + pie->accu_prob = 0; +} + +int +rte_pie_config_init(struct rte_pie_config *pie_cfg, + const uint16_t qdelay_ref, + const uint16_t dp_update_interval, + const uint16_t max_burst, + const uint16_t tailq_th) +{ + uint64_t tsc_hz = rte_get_tsc_hz(); + + if (pie_cfg == NULL) + return -1; + + if (qdelay_ref <= 0) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for qdelay_ref\n", __func__); + return -EINVAL; + } + + if (dp_update_interval <= 0) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for dp_update_interval\n", __func__); + return -EINVAL; + } + + if (max_burst <= 0) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for max_burst\n", __func__); + return -EINVAL; + } + + if (tailq_th <= 0) { + RTE_LOG(ERR, SCHED, + "%s: Incorrect value for tailq_th\n", __func__); + return -EINVAL; + } + + pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000; + pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / 1000; + pie_cfg->max_burst = (tsc_hz * max_burst) / 1000; + pie_cfg->tailq_th = tailq_th; + + return 0; +} diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h new file mode 100644 index 0000000000..f83c95664f --- /dev/null +++ b/lib/sched/rte_pie.h @@ -0,0 +1,393 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef __RTE_PIE_H_INCLUDED__ +#define __RTE_PIE_H_INCLUDED__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Proportional Integral controller Enhanced (PIE) + * + * + ***/ + +#include + +#include +#include + +#define RTE_DQ_THRESHOLD 16384 /**< Queue length threshold (2^14) + * to start measurement cycle (bytes) + */ +#define RTE_DQ_WEIGHT 0.25 /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */ +#define RTE_ALPHA 0.125 /**< Weights in drop probability calculations */ +#define RTE_BETA 1.25 /**< Weights in drop probability calculations */ +#define RTE_RAND_MAX ~0LLU /**< Max value of the random number */ + + +/** + * PIE configuration parameters passed by user + * + */ +struct rte_pie_params { + uint16_t qdelay_ref; /**< Latency Target (milliseconds) */ + uint16_t dp_update_interval; /**< Update interval for drop probability (milliseconds) */ + uint16_t max_burst; /**< Max Burst Allowance (milliseconds) */ + uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ +}; + +/** + * PIE configuration parameters + * + */ +struct rte_pie_config { + uint64_t qdelay_ref; /**< Latency Target (in CPU cycles.) */ + uint64_t dp_update_interval; /**< Update interval for drop probability (in CPU cycles) */ + uint64_t max_burst; /**< Max Burst Allowance (in CPU cycles.) */ + uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ +}; + +/** + * RED run-time data + */ +struct rte_pie { + uint16_t active; /**< Flag for activating/deactivating pie */ + uint16_t in_measurement; /**< Flag for activation of measurement cycle */ + uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */ + uint64_t start_measurement; /**< Time to start to measurement cycle (in cpu cycles) */ + uint64_t last_measurement; /**< Time of last measurement (in cpu cycles) */ + uint64_t qlen; /**< Queue length (packets count) */ + uint64_t qlen_bytes; /**< Queue length (bytes count) */ + uint64_t avg_dq_time; /**< Time averaged dequeue rate (in cpu cycles) */ + uint32_t burst_allowance; /**< Current burst allowance (bytes) */ + uint64_t qdelay_old; /**< Old queue delay (bytes) */ + double drop_prob; /**< Current packet drop probability */ + double accu_prob; /**< Accumulated packet drop probability */ +}; + +/** + * @brief Initialises run-time data + * + * @param pie [in,out] data pointer to PIE runtime data + */ +void +__rte_experimental +rte_pie_rt_data_init(struct rte_pie *pie); + +/** + * @brief Configures a single PIE configuration parameter structure. + * + * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure + * @param qdelay_ref [in] latency target(milliseconds) + * @param dp_update_interval [in] update interval for drop probability (milliseconds) + * @param max_burst [in] maximum burst allowance (milliseconds) + * @param tailq_th [in] tail drop threshold for the queue (number of packets) + * + * @return Operation status + * @retval 0 success + * @retval !0 error + */ +int +__rte_experimental +rte_pie_config_init(struct rte_pie_config *pie_cfg, + const uint16_t qdelay_ref, + const uint16_t dp_update_interval, + const uint16_t max_burst, + const uint16_t tailq_th); + +/** + * @brief Decides packet enqueue when queue is empty + * + * Note: packet is never dropped in this particular case. + * + * @param pie_cfg [in] config pointer to a PIE configuration parameter structure + * @param pie [in, out] data pointer to PIE runtime data + * @param pkt_len [in] packet length in bytes + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval !0 drop the packet + */ +static inline int +__rte_experimental +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg, + struct rte_pie *pie, + uint32_t pkt_len) +{ + RTE_ASSERT(pkt_len != NULL); + + /* Update the PIE qlen parameter */ + pie->qlen++; + pie->qlen_bytes += pkt_len; + + /** + * If the queue has been idle for a while, turn off PIE and Reset counters + */ + if ((pie->active == 1) && + (pie->qlen < (pie_cfg->tailq_th * 0.1))) { + pie->active = 0; + pie->in_measurement = 0; + } + + return 0; +} + +/** + * @brief make a decision to drop or enqueue a packet based on probability + * criteria + * + * @param pie_cfg [in] config pointer to a PIE configuration parameter structure + * @param pie [in, out] data pointer to PIE runtime data + * @param time [in] current time (measured in cpu cycles) + */ +static inline void +__rte_experimental +_calc_drop_probability(const struct rte_pie_config *pie_cfg, + struct rte_pie *pie, uint64_t time) +{ + uint64_t qdelay_ref = pie_cfg->qdelay_ref; + + /* Note: can be implemented using integer multiply. + * DQ_THRESHOLD is power of 2 value. + */ + double current_qdelay = pie->qlen * (pie->avg_dq_time / RTE_DQ_THRESHOLD); + + double p = RTE_ALPHA * (current_qdelay - qdelay_ref) + + RTE_BETA * (current_qdelay - pie->qdelay_old); + + if (pie->drop_prob < 0.000001) + p = p * 0.00048828125; /* (1/2048) = 0.00048828125 */ + else if (pie->drop_prob < 0.00001) + p = p * 0.001953125; /* (1/512) = 0.001953125 */ + else if (pie->drop_prob < 0.0001) + p = p * 0.0078125; /* (1/128) = 0.0078125 */ + else if (pie->drop_prob < 0.001) + p = p * 0.03125; /* (1/32) = 0.03125 */ + else if (pie->drop_prob < 0.01) + p = p * 0.125; /* (1/8) = 0.125 */ + else if (pie->drop_prob < 0.1) + p = p * 0.5; /* (1/2) = 0.5 */ + + if (pie->drop_prob >= 0.1 && p > 0.02) + p = 0.02; + + pie->drop_prob += p; + + double qdelay = qdelay_ref * 0.5; + + /* Exponentially decay drop prob when congestion goes away */ + if (current_qdelay < qdelay && pie->qdelay_old < qdelay) + pie->drop_prob *= 0.98; /* 1 - 1/64 is sufficient */ + + /* Bound drop probability */ + if (pie->drop_prob < 0) + pie->drop_prob = 0; + if (pie->drop_prob > 1) + pie->drop_prob = 1; + + pie->qdelay_old = current_qdelay; + pie->last_measurement = time; + + uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval; + + pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; +} + +/** + * @brief make a decision to drop or enqueue a packet based on probability + * criteria + * + * @param pie_cfg [in] config pointer to a PIE configuration parameter structure + * @param pie [in, out] data pointer to PIE runtime data + * + * @return operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet + */ +static inline int +__rte_experimental +_rte_pie_drop(const struct rte_pie_config *pie_cfg, + struct rte_pie *pie) +{ + uint64_t rand_value; + double qdelay = pie_cfg->qdelay_ref * 0.5; + + /* PIE is active but the queue is not congested: return 0 */ + if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) || + (pie->qlen <= (pie_cfg->tailq_th * 0.1))) + return 0; + + if (pie->drop_prob == 0) + pie->accu_prob = 0; + + /* For practical reasons, drop probability can be further scaled according + * to packet size, but one needs to set a bound to avoid unnecessary bias + * Random drop + */ + pie->accu_prob += pie->drop_prob; + + if (pie->accu_prob < 0.85) + return 0; + + if (pie->accu_prob >= 8.5) + return 1; + + rand_value = rte_rand()/RTE_RAND_MAX; + + if ((double)rand_value < pie->drop_prob) { + pie->accu_prob = 0; + return 1; + } + + /* No drop */ + return 0; +} + +/** + * @brief Decides if new packet should be enqeued or dropped for non-empty queue + * + * @param pie_cfg [in] config pointer to a PIE configuration parameter structure + * @param pie [in,out] data pointer to PIE runtime data + * @param pkt_len [in] packet length in bytes + * @param time [in] current time (measured in cpu cycles) + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on max threshold criterion + * @retval 2 drop the packet based on mark probability criterion + */ +static inline int +__rte_experimental +rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg, + struct rte_pie *pie, + uint32_t pkt_len, + const uint64_t time) +{ + /* Check queue space against the tail drop threshold */ + if (pie->qlen >= pie_cfg->tailq_th) { + + pie->accu_prob = 0; + return 1; + } + + if (pie->active) { + /* Update drop probability after certain interval */ + if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval) + _calc_drop_probability(pie_cfg, pie, time); + + /* Decide whether packet to be dropped or enqueued */ + if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0) + return 2; + } + + /* When queue occupancy is over a certain threshold, turn on PIE */ + if ((pie->active == 0) && + (pie->qlen >= (pie_cfg->tailq_th * 0.1))) { + pie->active = 1; + pie->qdelay_old = 0; + pie->drop_prob = 0; + pie->in_measurement = 1; + pie->departed_bytes_count = 0; + pie->avg_dq_time = 0; + pie->last_measurement = time; + pie->burst_allowance = pie_cfg->max_burst; + pie->accu_prob = 0; + pie->start_measurement = time; + } + + /* when queue has been idle for a while, turn off PIE and Reset counters */ + if (pie->active == 1 && + pie->qlen < (pie_cfg->tailq_th * 0.1)) { + pie->active = 0; + pie->in_measurement = 0; + } + + /* Update PIE qlen parameter */ + pie->qlen++; + pie->qlen_bytes += pkt_len; + + /* No drop */ + return 0; +} + +/** + * @brief Decides if new packet should be enqeued or dropped + * Updates run time data and gives verdict whether to enqueue or drop the packet. + * + * @param pie_cfg [in] config pointer to a PIE configuration parameter structure + * @param pie [in,out] data pointer to PIE runtime data + * @param qlen [in] queue length + * @param pkt_len [in] packet length in bytes + * @param time [in] current time stamp (measured in cpu cycles) + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on drop probility criteria + */ +static inline int +__rte_experimental +rte_pie_enqueue(const struct rte_pie_config *pie_cfg, + struct rte_pie *pie, + const unsigned int qlen, + uint32_t pkt_len, + const uint64_t time) +{ + RTE_ASSERT(pie_cfg != NULL); + RTE_ASSERT(pie != NULL); + + if (qlen != 0) + return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time); + else + return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); +} + +/** + * @brief PIE rate estimation method + * Called on each packet departure. + * + * @param pie [in] data pointer to PIE runtime data + * @param pkt_len [in] packet length in bytes + * @param time [in] current time stamp in cpu cycles + */ +static inline void +__rte_experimental +rte_pie_dequeue(struct rte_pie *pie, + uint32_t pkt_len, + uint64_t time) +{ + /* Dequeue rate estimation */ + if (pie->in_measurement) { + pie->departed_bytes_count += pkt_len; + + /* Start a new measurement cycle when enough packets */ + if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) { + uint64_t dq_time = time - pie->start_measurement; + + if (pie->avg_dq_time == 0) + pie->avg_dq_time = dq_time; + else + pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time + * (1 - RTE_DQ_WEIGHT); + + pie->in_measurement = 0; + } + } + + /* Start measurement cycle when enough data in the queue */ + if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) { + pie->in_measurement = 1; + pie->start_measurement = time; + pie->departed_bytes_count = 0; + } +} + +#ifdef __cplusplus +} +#endif + +#endif /* __RTE_PIE_H_INCLUDED__ */ diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c index a858f61f95..320435ed91 100644 --- a/lib/sched/rte_sched.c +++ b/lib/sched/rte_sched.c @@ -89,8 +89,12 @@ struct rte_sched_queue { struct rte_sched_queue_extra { struct rte_sched_queue_stats stats; -#ifdef RTE_SCHED_RED - struct rte_red red; +#ifdef RTE_SCHED_AQM + RTE_STD_C11 + union { + struct rte_red red; + struct rte_pie pie; + }; #endif }; @@ -183,8 +187,13 @@ struct rte_sched_subport { /* Pipe queues size */ uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; -#ifdef RTE_SCHED_RED - struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; + enum rte_sched_aqm_mode aqm; +#ifdef RTE_SCHED_AQM + RTE_STD_C11 + union { + struct rte_red_config wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; + struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + }; #endif /* Scheduling loop detection */ @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports) rte_free(port); } +#ifdef RTE_SCHED_AQM + +static int +rte_sched_red_config(struct rte_sched_port *port, + struct rte_sched_subport *s, + struct rte_sched_subport_params *params, + uint32_t n_subports) +{ + uint32_t i; + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + + uint32_t j; + + for (j = 0; j < RTE_COLORS; j++) { + /* if min/max are both zero, then RED is disabled */ + if ((params->wred_params[i][j].min_th | + params->wred_params[i][j].max_th) == 0) { + continue; + } + + if (rte_red_config_init(&s->wred_config[i][j], + params->wred_params[i][j].wq_log2, + params->wred_params[i][j].min_th, + params->wred_params[i][j].max_th, + params->wred_params[i][j].maxp_inv) != 0) { + rte_sched_free_memory(port, n_subports); + + RTE_LOG(NOTICE, SCHED, + "%s: RED configuration init fails\n", __func__); + return -EINVAL; + } + } + } + s->aqm = RTE_SCHED_AQM_WRED; + return 0; +} + +static int +rte_sched_pie_config(struct rte_sched_port *port, + struct rte_sched_subport *s, + struct rte_sched_subport_params *params, + uint32_t n_subports) +{ + uint32_t i; + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + if (params->pie_params[i].tailq_th > params->qsize[i]) { + RTE_LOG(NOTICE, SCHED, + "%s: PIE tailq threshold incorrect\n", __func__); + return -EINVAL; + } + + if (rte_pie_config_init(&s->pie_config[i], + params->pie_params[i].qdelay_ref, + params->pie_params[i].dp_update_interval, + params->pie_params[i].max_burst, + params->pie_params[i].tailq_th) != 0) { + rte_sched_free_memory(port, n_subports); + + RTE_LOG(NOTICE, SCHED, + "%s: PIE configuration init fails\n", __func__); + return -EINVAL; + } + } + s->aqm = RTE_SCHED_AQM_PIE; + return 0; +} + +static int +rte_sched_aqm_config(struct rte_sched_port *port, + struct rte_sched_subport *s, + struct rte_sched_subport_params *params, + uint32_t n_subports) +{ + if (params->aqm == RTE_SCHED_AQM_WRED) + return rte_sched_red_config(port, s, params, n_subports); + + else if (params->aqm == RTE_SCHED_AQM_PIE) + return rte_sched_pie_config(port, s, params, n_subports); + + return -EINVAL; +} +#endif + int rte_sched_subport_config(struct rte_sched_port *port, uint32_t subport_id, @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct rte_sched_port *port, s->n_pipe_profiles = params->n_pipe_profiles; s->n_max_pipe_profiles = params->n_max_pipe_profiles; -#ifdef RTE_SCHED_RED - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { - uint32_t j; - - for (j = 0; j < RTE_COLORS; j++) { - /* if min/max are both zero, then RED is disabled */ - if ((params->red_params[i][j].min_th | - params->red_params[i][j].max_th) == 0) { - continue; - } - - if (rte_red_config_init(&s->red_config[i][j], - params->red_params[i][j].wq_log2, - params->red_params[i][j].min_th, - params->red_params[i][j].max_th, - params->red_params[i][j].maxp_inv) != 0) { - RTE_LOG(NOTICE, SCHED, - "%s: RED configuration init fails\n", - __func__); - ret = -EINVAL; - goto out; - } - } +#ifdef RTE_SCHED_AQM + status = rte_sched_aqm_config(port, s, params, n_subports); + if (status) { + RTE_LOG(NOTICE, SCHED, "%s: AQM configuration fails\n", __func__); + return status; } #endif @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct rte_sched_port *port, subport->stats.n_bytes_tc[tc_index] += pkt_len; } -#ifdef RTE_SCHED_RED -static inline void -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, - struct rte_sched_subport *subport, - uint32_t qindex, - struct rte_mbuf *pkt, - uint32_t red) -#else static inline void rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, struct rte_sched_subport *subport, uint32_t qindex, struct rte_mbuf *pkt, - __rte_unused uint32_t red) -#endif + __rte_unused uint32_t drops) { uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex); uint32_t pkt_len = pkt->pkt_len; subport->stats.n_pkts_tc_dropped[tc_index] += 1; subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len; -#ifdef RTE_SCHED_RED - subport->stats.n_pkts_red_dropped[tc_index] += red; +#ifdef RTE_SCHED_AQM + subport->stats.n_pkts_aqm_dropped[tc_index] += drops; #endif } @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct rte_sched_subport *subport, qe->stats.n_bytes += pkt_len; } -#ifdef RTE_SCHED_RED -static inline void -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport, - uint32_t qindex, - struct rte_mbuf *pkt, - uint32_t red) -#else static inline void rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport, uint32_t qindex, struct rte_mbuf *pkt, - __rte_unused uint32_t red) -#endif + __rte_unused uint32_t drops) { struct rte_sched_queue_extra *qe = subport->queue_extra + qindex; uint32_t pkt_len = pkt->pkt_len; qe->stats.n_pkts_dropped += 1; qe->stats.n_bytes_dropped += pkt_len; -#ifdef RTE_SCHED_RED - qe->stats.n_pkts_red_dropped += red; +#ifdef RTE_SCHED_AQM + qe->stats.n_pkts_aqm_dropped += drops; #endif } #endif /* RTE_SCHED_COLLECT_STATS */ -#ifdef RTE_SCHED_RED +#ifdef RTE_SCHED_AQM static inline int -rte_sched_port_red_drop(struct rte_sched_port *port, +rte_sched_port_aqm_drop(struct rte_sched_port *port, struct rte_sched_subport *subport, struct rte_mbuf *pkt, uint32_t qindex, uint16_t qlen) { struct rte_sched_queue_extra *qe; - struct rte_red_config *red_cfg; - struct rte_red *red; uint32_t tc_index; - enum rte_color color; tc_index = rte_sched_port_pipe_tc(port, qindex); - color = rte_sched_port_pkt_read_color(pkt); - red_cfg = &subport->red_config[tc_index][color]; + qe = subport->queue_extra + qindex; - if ((red_cfg->min_th | red_cfg->max_th) == 0) - return 0; + /* WRED */ + if (subport->aqm == RTE_SCHED_AQM_WRED) { + struct rte_red_config *red_cfg; + struct rte_red *red; + enum rte_color color; - qe = subport->queue_extra + qindex; - red = &qe->red; + color = rte_sched_port_pkt_read_color(pkt); + red_cfg = &subport->wred_config[tc_index][color]; + + if ((red_cfg->min_th | red_cfg->max_th) == 0) + return 0; - return rte_red_enqueue(red_cfg, red, qlen, port->time); + red = &qe->red; + + return rte_red_enqueue(red_cfg, red, qlen, port->time); + } + + /* PIE */ + struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index]; + struct rte_pie *pie = &qe->pie; + + return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port->time_cpu_cycles); } static inline void @@ -1815,14 +1885,29 @@ rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, struct rte_sched_subport *subport, uint32_t qindex) { struct rte_sched_queue_extra *qe = subport->queue_extra + qindex; - struct rte_red *red = &qe->red; + if (subport->aqm == RTE_SCHED_AQM_WRED) { + struct rte_red *red = &qe->red; + + rte_red_mark_queue_empty(red, port->time); + } +} + +static inline void +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, +uint32_t qindex, uint32_t pkt_len, uint64_t time) { + struct rte_sched_queue_extra *qe = subport->queue_extra + qindex; + struct rte_pie *pie = &qe->pie; - rte_red_mark_queue_empty(red, port->time); + /* Update queue length */ + pie->qlen -= 1; + pie->qlen_bytes -= pkt_len; + + rte_pie_dequeue(pie, pkt_len, time); } #else -static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused, +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port __rte_unused, struct rte_sched_subport *subport __rte_unused, struct rte_mbuf *pkt __rte_unused, uint32_t qindex __rte_unused, @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unus #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex) -#endif /* RTE_SCHED_RED */ +#endif /* RTE_SCHED_AQM */ #ifdef RTE_SCHED_DEBUG @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct rte_sched_port *port, qlen = q->qw - q->qr; /* Drop the packet (and update drop stats) when queue is full */ - if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) || + if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, qlen) || (qlen >= qsize))) { rte_pktmbuf_free(pkt); #ifdef RTE_SCHED_COLLECT_STATS @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port, { struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_queue *queue = grinder->queue[grinder->qpos]; + uint32_t qindex = grinder->qindex[grinder->qpos]; struct rte_mbuf *pkt = grinder->pkt; uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; uint32_t be_tc_active; @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port, (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active; if (queue->qr == queue->qw) { - uint32_t qindex = grinder->qindex[grinder->qpos]; - rte_bitmap_clear(subport->bmp, qindex); grinder->qmask &= ~(1 << grinder->qpos); if (be_tc_active) grinder->wrr_mask[grinder->qpos] = 0; + rte_sched_port_set_queue_empty_timestamp(port, subport, qindex); } +#ifdef RTE_SCHED_AQM + if (subport->aqm == RTE_SCHED_AQM_PIE) + rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles); +#endif + /* Reset pipe loop detection */ subport->pipe_loop = RTE_SCHED_PIPE_INVALID; grinder->productive = 1; diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h index c1a772b70c..a5fe6266cd 100644 --- a/lib/sched/rte_sched.h +++ b/lib/sched/rte_sched.h @@ -61,9 +61,10 @@ extern "C" { #include #include -/** Random Early Detection (RED) */ -#ifdef RTE_SCHED_RED +/** Active Queue Management */ +#ifdef RTE_SCHED_AQM #include "rte_red.h" +#include "rte_pie.h" #endif /** Maximum number of queues per pipe. @@ -110,6 +111,28 @@ extern "C" { #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 #endif +/** + * Active Queue Management (AQM) mode + * + * This is used for controlling the admission of packets into a packet queue or + * group of packet queues on congestion. + * + * The *Random Early Detection (RED)* algorithm works by proactively dropping + * more and more input packets as the queue occupancy builds up. When the queue + * is full or almost full, RED effectively works as *tail drop*. The *Weighted + * RED* algorithm uses a separate set of RED thresholds for each packet color. + * + * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly + * drops a packet at the onset of the congestion and tries to control the + * latency around the target value. The congestion detection, however, is based + * on the queueing latency instead of the queue length like RED. For more + * information, refer RFC8033. + */ +enum rte_sched_aqm_mode { + RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection (WRED) */ + RTE_SCHED_AQM_PIE, /**< Proportional Integral Controller Enhanced (PIE) */ +}; + /* * Pipe configuration parameters. The period and credits_per_period * parameters are measured in bytes, with one byte meaning the time @@ -174,9 +197,17 @@ struct rte_sched_subport_params { /** Max allowed profiles in the pipe profile table */ uint32_t n_max_pipe_profiles; -#ifdef RTE_SCHED_RED - /** RED parameters */ - struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; +#ifdef RTE_SCHED_AQM + /** Active Queue Management mode */ + enum rte_sched_aqm_mode aqm; + + RTE_STD_C11 + union { + /** WRED parameters */ + struct rte_red_params wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; + /** PIE parameters */ + struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + }; #endif }; @@ -208,9 +239,9 @@ struct rte_sched_subport_stats { /** Number of bytes dropped for each traffic class */ uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; -#ifdef RTE_SCHED_RED - /** Number of packets dropped by red */ - uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; +#ifdef RTE_SCHED_AQM + /** Number of packets dropped by active queue management scheme */ + uint64_t n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; #endif }; @@ -222,9 +253,9 @@ struct rte_sched_queue_stats { /** Packets dropped */ uint64_t n_pkts_dropped; -#ifdef RTE_SCHED_RED - /** Packets dropped by RED */ - uint64_t n_pkts_red_dropped; +#ifdef RTE_SCHED_AQM + /** Packets dropped by active queue management scheme */ + uint64_t n_pkts_aqm_dropped; #endif /** Bytes successfully written */ diff --git a/lib/sched/version.map b/lib/sched/version.map index ace284b7de..3422821ac8 100644 --- a/lib/sched/version.map +++ b/lib/sched/version.map @@ -30,4 +30,7 @@ EXPERIMENTAL { rte_sched_subport_pipe_profile_add; # added in 20.11 rte_sched_port_subport_profile_add; + + rte_pie_rt_data_init; + rte_pie_config_init; }; -- 2.25.1