From: "Robertson, Alan" <ar771e@intl.att.com>
To: "'alangordondewar@gmail.com'" <alangordondewar@gmail.com>,
"'cristian.dumitrescu@intel.com'" <cristian.dumitrescu@intel.com>
Cc: "'dev@dpdk.org'" <dev@dpdk.org>, "'Alan Dewar'" <alan.dewar@att.com>
Subject: Re: [dpdk-dev] [RFC] sched: parameterize QoS traffic-classes and queues
Date: Thu, 5 Oct 2017 10:19:11 +0000 [thread overview]
Message-ID: <051E977EFCB14842A3BEA5680BAFC91B53B3D7@gbcdcmbx03.intl.att.com> (raw)
In-Reply-To: <1507195258-14766-1-git-send-email-alan.dewar@att.com>
Hi Alan,
Comments inline, search for AGR>
Alan.
From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of alangordondewar@gmail.com
Sent: Thursday, October 05, 2017 10:21 AM
To: cristian.dumitrescu@intel.com
Cc: dev@dpdk.org; Alan Dewar <alan.dewar@att.com>
Subject: [dpdk-dev] [RFC] sched: parameterize QoS traffic-classes and queues
From: Alan Dewar <alan.dewar@att.com>
The DPDK QoS framework has hierarchy of QoS scheduling elements: port, subport, pipe, traffic-class and queue. The first two levels of the hierarchy are flexible (port and subport) in the number child nodes that each parent can have, but from the pipe layer down the number of child nodes is hard-coded as four.
These proposed changes allow these hard-coded limits to be modified by changing a couple of compile-time constants.
The default configuration remains as four TCs and four queues.
The sched_autotest passes successfully with the default configuration.
Real world testing has included 2 x 4, 4 x 4 and 4 x 8 (TCs x queues) configurations.
Signed-off-by: Alan Dewar <alan.dewar@att.com>
---
lib/librte_sched/rte_sched.c | 412 ++++++++++++++++++++----------------
lib/librte_sched/rte_sched.h | 27 ++-
lib/librte_sched/rte_sched_common.h | 16 ++
3 files changed, 268 insertions(+), 187 deletions(-)
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index b7cba11..d540553 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -65,8 +65,7 @@
#endif
#define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7)
-#define RTE_SCHED_WRR_SHIFT 3
-#define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE)
+#define RTE_SCHED_GRINDER_PCACHE_SIZE 4
#define RTE_SCHED_PIPE_INVALID UINT32_MAX
#define RTE_SCHED_BMP_POS_INVALID UINT32_MAX
@@ -165,12 +164,12 @@ enum grinder_state {
* by scheduler enqueue.
*/
struct rte_sched_port_hierarchy {
- uint16_t queue:2; /**< Queue ID (0 .. 3) */
- uint16_t traffic_class:2; /**< Traffic class ID (0 .. 3)*/
- uint32_t color:2; /**< Color */
- uint16_t unused:10;
- uint16_t subport; /**< Subport ID */
- uint32_t pipe; /**< Pipe ID */
+ uint16_t queue:RTE_SCHED_WRR_SHIFT; /**< Queue ID */
+ uint16_t traffic_class:RTE_SCHED_TC_SHIFT; /**< Traffic class ID */
+ uint16_t color:2; /**< Color */
+ uint32_t unused:16 - (2 + RTE_SCHED_WRR_SHIFT + RTE_SCHED_TC_SHIFT);
+ uint16_t subport; /**< Subport ID */
+ uint32_t pipe; /**< Pipe ID */
};
struct rte_sched_grinder {
@@ -196,9 +195,9 @@ struct rte_sched_grinder {
/* Current TC */
uint32_t tc_index;
- struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
- struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
- uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ struct rte_sched_queue *queue[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ struct rte_mbuf **qbase[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ uint32_t qindex[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
uint16_t qsize;
uint32_t qmask;
uint32_t qpos;
@@ -219,7 +218,7 @@ struct rte_sched_port {
uint32_t frame_overhead;
uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
uint32_t n_pipe_profiles;
- uint32_t pipe_tc3_rate_max;
+ uint32_t pipe_low_prio_tc_rate_max;
#ifdef RTE_SCHED_RED
struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS];
#endif
@@ -289,8 +288,8 @@ rte_sched_port_queues_per_port(struct rte_sched_port *port) static inline struct rte_mbuf ** rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex) {
- uint32_t pindex = qindex >> 4;
- uint32_t qpos = qindex & 0xF;
+ uint32_t pindex = qindex >> RTE_SCHED_TC_WRR_SHIFT;
+ uint32_t qpos = qindex & RTE_SCHED_TC_WRR_MASK;
return (port->queue_array + pindex *
port->qsize_sum + port->qsize_add[qpos]); @@ -299,7 +298,7 @@ rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex) static inline uint16_t rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex) {
- uint32_t tc = (qindex >> 2) & 0x3;
+ uint32_t tc = (qindex >> RTE_SCHED_WRR_SHIFT) & RTE_SCHED_TC_MASK;
return port->qsize[tc];
}
@@ -373,7 +372,7 @@ rte_sched_port_check_params(struct rte_sched_port_params *params)
return -13;
#ifdef RTE_SCHED_SUBPORT_TC_OV
- /* TC3 oversubscription weight: non-zero */
+ /* Lowest priority TC oversubscription weight: non-zero */
if (p->tc_ov_weight == 0)
return -14;
#endif
@@ -471,43 +470,81 @@ rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params) static void rte_sched_port_config_qsize(struct rte_sched_port *port) {
- /* TC 0 */
- port->qsize_add[0] = 0;
- port->qsize_add[1] = port->qsize_add[0] + port->qsize[0];
- port->qsize_add[2] = port->qsize_add[1] + port->qsize[0];
- port->qsize_add[3] = port->qsize_add[2] + port->qsize[0];
-
- /* TC 1 */
- port->qsize_add[4] = port->qsize_add[3] + port->qsize[0];
- port->qsize_add[5] = port->qsize_add[4] + port->qsize[1];
- port->qsize_add[6] = port->qsize_add[5] + port->qsize[1];
- port->qsize_add[7] = port->qsize_add[6] + port->qsize[1];
-
- /* TC 2 */
- port->qsize_add[8] = port->qsize_add[7] + port->qsize[1];
- port->qsize_add[9] = port->qsize_add[8] + port->qsize[2];
- port->qsize_add[10] = port->qsize_add[9] + port->qsize[2];
- port->qsize_add[11] = port->qsize_add[10] + port->qsize[2];
-
- /* TC 3 */
- port->qsize_add[12] = port->qsize_add[11] + port->qsize[2];
- port->qsize_add[13] = port->qsize_add[12] + port->qsize[3];
- port->qsize_add[14] = port->qsize_add[13] + port->qsize[3];
- port->qsize_add[15] = port->qsize_add[14] + port->qsize[3];
-
- port->qsize_sum = port->qsize_add[15] + port->qsize[3];
+ uint32_t tc;
+ uint32_t q;
+ uint32_t index;
+
+ for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) {
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++) {
+ index = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + q;
AGR> Why the complex operation, isn't this just index++ ?
+ if (index == 0)
+ port->qsize_add[index] = 0;
AGR> index will only be 0 the first time through the loops so why not just call this
Unconditionally before the for loops
+ else if (q == 0)
+ port->qsize_add[index] =
+ port->qsize_add[index - 1] +
+ port->qsize[tc - 1];
+ else
+ port->qsize_add[index] =
+ port->qsize_add[index - 1] +
+ port->qsize[tc];
+ }
+ }
+ port->qsize_sum = port->qsize_add[index] +
+ port->qsize[RTE_SCHED_MAX_TC];
+}
+
+static char *
+rte_sched_build_credit_array_string(uint32_t *tc_credits_per_period,
+ char *output_str)
+{
+ uint32_t tc;
+ int str_len;
+
+ str_len = sprintf(output_str, "[");
+ for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) {
+ str_len += sprintf(output_str + str_len, "%u",
+ tc_credits_per_period[tc]);
+ if (tc != RTE_SCHED_MAX_TC)
+ str_len += sprintf(output_str + str_len, ", ");
+ }
+ str_len += sprintf(output_str + str_len, "]");
+ return output_str;
+}
+
+static char *
+rte_sched_build_wrr_cost_string(struct rte_sched_pipe_profile *p,
+ char *output_str)
+{
+ uint32_t wrr;
+ int str_len;
+
+ str_len = sprintf(output_str, "[");
+ for (wrr = 0; wrr < RTE_SCHED_QUEUES_PER_PIPE; wrr++) {
+ str_len += sprintf(output_str + str_len, "%hhu",
+ p->wrr_cost[wrr]);
+ if (wrr != RTE_SCHED_QUEUES_PER_PIPE - 1)
+ str_len += sprintf(output_str + str_len, ", ");
+ }
+ str_len += sprintf(output_str + str_len, "]");
+ return output_str;
}
static void
rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i) {
struct rte_sched_pipe_profile *p = port->pipe_profiles + i;
+ char credits_str[(13 * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + 3];
+ char wrr_cost_str[(4 * RTE_SCHED_QUEUES_PER_PIPE) + 3];
+
+ rte_sched_build_credit_array_string(p->tc_credits_per_period,
+ credits_str);
+ rte_sched_build_wrr_cost_string(p, wrr_cost_str);
RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
" Token bucket: period = %u, credits per period = %u, size = %u\n"
- " Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
+ " Traffic classes: period = %u, credits per period = %s\n"
" Traffic class 3 oversubscription: weight = %hhu\n"
- " WRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n",
+ " WRR cost: %s\n",
i,
/* Token bucket */
@@ -517,19 +554,13 @@ rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i)
/* Traffic classes */
p->tc_period,
- p->tc_credits_per_period[0],
- p->tc_credits_per_period[1],
- p->tc_credits_per_period[2],
- p->tc_credits_per_period[3],
+ credits_str,
/* Traffic class 3 oversubscription */
p->tc_ov_weight,
/* WRR */
- p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3],
- p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7],
- p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11],
- p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]);
+ wrr_cost_str);
}
static inline uint64_t
@@ -581,41 +612,56 @@ rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte
/* WRR */
for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
- uint32_t lcd, lcd1, lcd2;
+ uint32_t lcd[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ uint32_t lcd_elements;
uint32_t qindex;
+ uint32_t q;
qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+ q++) {
+ lcd[q] = src->wrr_weights[qindex + q];
+ wrr_cost[q] = lcd[q];
+ }
- wrr_cost[0] = src->wrr_weights[qindex];
- wrr_cost[1] = src->wrr_weights[qindex + 1];
- wrr_cost[2] = src->wrr_weights[qindex + 2];
- wrr_cost[3] = src->wrr_weights[qindex + 3];
-
- lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
- lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
- lcd = rte_get_lcd(lcd1, lcd2);
-
- wrr_cost[0] = lcd / wrr_cost[0];
- wrr_cost[1] = lcd / wrr_cost[1];
- wrr_cost[2] = lcd / wrr_cost[2];
- wrr_cost[3] = lcd / wrr_cost[3];
+ /*
+ * Calculate the LCD of an array of wrr_costs.
+ * The number of elements in the array must be a power
+ * of two. Calculate the LCD of two adjacent values,
+ * store the results back in the array, each time
+ * around the while loop halves the number of active
+ * elements in the array.
+ * The answer eventually appears in lcd[0].
+ */
+ lcd_elements = RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+ while (lcd_elements > 1) {
+ for (q = 0;
+ q < lcd_elements;
+ q += 2) {
+ lcd[q/2] = rte_get_lcd(lcd[q],
+ lcd[q + 1]);
+ }
+ lcd_elements >>= 1;
+ }
- dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0];
- dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1];
- dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2];
- dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3];
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+ q++) {
+ wrr_cost[q] = lcd[0] / wrr_cost[q];
+ dst->wrr_cost[qindex + q] =
+ (uint8_t) wrr_cost[q];
+ }
}
rte_sched_port_log_pipe_profile(port, i);
}
- port->pipe_tc3_rate_max = 0;
+ port->pipe_low_prio_tc_rate_max = 0;
for (i = 0; i < port->n_pipe_profiles; i++) {
struct rte_sched_pipe_params *src = params->pipe_profiles + i;
- uint32_t pipe_tc3_rate = src->tc_rate[3];
+ uint32_t pipe_low_prio_tc_rate = src->tc_rate[RTE_SCHED_MAX_TC];
- if (port->pipe_tc3_rate_max < pipe_tc3_rate)
- port->pipe_tc3_rate_max = pipe_tc3_rate;
+ if (port->pipe_low_prio_tc_rate_max < pipe_low_prio_tc_rate)
+ port->pipe_low_prio_tc_rate_max = pipe_low_prio_tc_rate;
}
}
@@ -765,10 +811,14 @@ static void
rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i) {
struct rte_sched_subport *s = port->subport + i;
+ char credits_str[(13 * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + 3];
+
+ rte_sched_build_credit_array_string(s->tc_credits_per_period,
+ credits_str);
RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n"
" Token bucket: period = %u, credits per period = %u, size = %u\n"
- " Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n"
+ " Traffic classes: period = %u, credits per period = %s\n"
" Traffic class 3 oversubscription: wm min = %u, wm max = %u\n",
i,
@@ -779,10 +829,7 @@ rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i)
/* Traffic classes */
s->tc_period,
- s->tc_credits_per_period[0],
- s->tc_credits_per_period[1],
- s->tc_credits_per_period[2],
- s->tc_credits_per_period[3],
+ credits_str,
/* Traffic class 3 oversubscription */
s->tc_ov_wm_min,
@@ -849,8 +896,8 @@ rte_sched_subport_config(struct rte_sched_port *port, #ifdef RTE_SCHED_SUBPORT_TC_OV
/* TC oversubscription */
s->tc_ov_wm_min = port->mtu;
- s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(params->tc_period,
- port->pipe_tc3_rate_max);
+ s->tc_ov_wm_max = rte_sched_time_ms_to_bytes
+ (params->tc_period, port->pipe_low_prio_tc_rate_max);
s->tc_ov_wm = s->tc_ov_wm_max;
s->tc_ov_period_id = 0;
s->tc_ov = 0;
@@ -897,21 +944,27 @@ rte_sched_pipe_config(struct rte_sched_port *port,
params = port->pipe_profiles + p->profile;
#ifdef RTE_SCHED_SUBPORT_TC_OV
- double subport_tc3_rate = (double) s->tc_credits_per_period[3]
+ double subport_low_prio_tc_rate;
+ double pipe_low_prio_tc_rate;
+ uint32_t low_prio_tc_ov = s->tc_ov;
+
+ subport_low_prio_tc_rate =
+ (double) s->tc_credits_per_period[RTE_SCHED_MAX_TC]
/ (double) s->tc_period;
- double pipe_tc3_rate = (double) params->tc_credits_per_period[3]
+ pipe_low_prio_tc_rate =
+ (double) params->tc_credits_per_period[RTE_SCHED_MAX_TC]
/ (double) params->tc_period;
- uint32_t tc3_ov = s->tc_ov;
/* Unplug pipe from its subport */
s->tc_ov_n -= params->tc_ov_weight;
- s->tc_ov_rate -= pipe_tc3_rate;
- s->tc_ov = s->tc_ov_rate > subport_tc3_rate;
+ s->tc_ov_rate -= pipe_low_prio_tc_rate;
+ s->tc_ov = s->tc_ov_rate > subport_low_prio_tc_rate;
- if (s->tc_ov != tc3_ov) {
+ if (s->tc_ov != low_prio_tc_ov) {
RTE_LOG(DEBUG, SCHED,
- "Subport %u TC3 oversubscription is OFF (%.4lf >= %.4lf)\n",
- subport_id, subport_tc3_rate, s->tc_ov_rate);
+ "Subport %u TC%u oversubscription is OFF (%.4lf >= %.4lf)\n",
+ subport_id, RTE_SCHED_MAX_TC,
+ subport_low_prio_tc_rate, s->tc_ov_rate);
}
#endif
@@ -937,21 +990,27 @@ rte_sched_pipe_config(struct rte_sched_port *port,
#ifdef RTE_SCHED_SUBPORT_TC_OV
{
- /* Subport TC3 oversubscription */
- double subport_tc3_rate = (double) s->tc_credits_per_period[3]
+ /* Subport lowest priority TC oversubscription */
+ double subport_low_prio_tc_rate;
+ double pipe_low_prio_tc_rate;
+ uint32_t low_prio_tc_ov = s->tc_ov;
+
+ subport_low_prio_tc_rate =
+ (double) s->tc_credits_per_period[RTE_SCHED_MAX_TC]
/ (double) s->tc_period;
- double pipe_tc3_rate = (double) params->tc_credits_per_period[3]
+ pipe_low_prio_tc_rate =
+ (double) params->tc_credits_per_period[RTE_SCHED_MAX_TC]
/ (double) params->tc_period;
- uint32_t tc3_ov = s->tc_ov;
s->tc_ov_n += params->tc_ov_weight;
- s->tc_ov_rate += pipe_tc3_rate;
- s->tc_ov = s->tc_ov_rate > subport_tc3_rate;
+ s->tc_ov_rate += pipe_low_prio_tc_rate;
+ s->tc_ov = s->tc_ov_rate > subport_low_prio_tc_rate;
- if (s->tc_ov != tc3_ov) {
+ if (s->tc_ov != low_prio_tc_ov) {
RTE_LOG(DEBUG, SCHED,
- "Subport %u TC3 oversubscription is ON (%.4lf < %.4lf)\n",
- subport_id, subport_tc3_rate, s->tc_ov_rate);
+ "Subport %u TC%u oversubscription is ON (%.4lf < %.4lf)\n",
+ subport_id, RTE_SCHED_MAX_TC,
+ subport_low_prio_tc_rate, s->tc_ov_rate);
}
p->tc_ov_period_id = s->tc_ov_period_id;
p->tc_ov_credits = s->tc_ov_wm;
@@ -1085,7 +1144,7 @@ static inline void rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt) {
struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
- uint32_t tc_index = (qindex >> 2) & 0x3;
+ uint32_t tc_index = (qindex >> RTE_SCHED_WRR_SHIFT) &
+RTE_SCHED_TC_MASK;
uint32_t pkt_len = pkt->pkt_len;
s->stats.n_pkts_tc[tc_index] += 1;
@@ -1105,7 +1164,8 @@ rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, #endif {
struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port));
- uint32_t tc_index = (qindex >> 2) & 0x3;
+ uint32_t tc_index = (qindex >> RTE_SCHED_WRR_SHIFT) &
+RTE_SCHED_TC_MASK;
+
uint32_t pkt_len = pkt->pkt_len;
s->stats.n_pkts_tc_dropped[tc_index] += 1; @@ -1160,7 +1220,7 @@ rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint3
uint32_t tc_index;
enum rte_meter_color color;
- tc_index = (qindex >> 2) & 0x3;
+ tc_index = (qindex >> RTE_SCHED_WRR_SHIFT) & RTE_SCHED_TC_MASK;
color = rte_sched_port_pkt_read_color(pkt);
red_cfg = &port->red_config[tc_index][color];
@@ -1480,6 +1540,7 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
struct rte_sched_pipe *pipe = grinder->pipe;
struct rte_sched_pipe_profile *params = grinder->pipe_params;
uint64_t n_periods;
+ uint32_t tc;
/* Subport TB */
n_periods = (port->time - subport->tb_time) / subport->tb_period; @@ -1495,19 +1556,19 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
/* Subport TCs */
if (unlikely(port->time >= subport->tc_time)) {
- subport->tc_credits[0] = subport->tc_credits_per_period[0];
- subport->tc_credits[1] = subport->tc_credits_per_period[1];
- subport->tc_credits[2] = subport->tc_credits_per_period[2];
- subport->tc_credits[3] = subport->tc_credits_per_period[3];
+ for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) {
+ subport->tc_credits[tc] =
+ subport->tc_credits_per_period[tc];
+ }
subport->tc_time = port->time + subport->tc_period;
}
/* Pipe TCs */
if (unlikely(port->time >= pipe->tc_time)) {
- pipe->tc_credits[0] = params->tc_credits_per_period[0];
- pipe->tc_credits[1] = params->tc_credits_per_period[1];
- pipe->tc_credits[2] = params->tc_credits_per_period[2];
- pipe->tc_credits[3] = params->tc_credits_per_period[3];
+ for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) {
+ pipe->tc_credits[tc] =
+ params->tc_credits_per_period[tc];
+ }
pipe->tc_time = port->time + params->tc_period;
}
}
@@ -1522,19 +1583,24 @@ grinder_tc_ov_credits_update(struct rte_sched_port *port, uint32_t pos)
uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
uint32_t tc_ov_consumption_max;
uint32_t tc_ov_wm = subport->tc_ov_wm;
+ uint32_t consumption = 0;
+ uint32_t tc;
if (subport->tc_ov == 0)
return subport->tc_ov_wm_max;
- tc_ov_consumption[0] = subport->tc_credits_per_period[0] - subport->tc_credits[0];
- tc_ov_consumption[1] = subport->tc_credits_per_period[1] - subport->tc_credits[1];
- tc_ov_consumption[2] = subport->tc_credits_per_period[2] - subport->tc_credits[2];
- tc_ov_consumption[3] = subport->tc_credits_per_period[3] - subport->tc_credits[3];
+ for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) {
+ tc_ov_consumption[tc] = subport->tc_credits_per_period[tc]
+ - subport->tc_credits[tc];
+ if (tc < RTE_SCHED_MAX_TC)
+ consumption += tc_ov_consumption[tc];
+ }
- tc_ov_consumption_max = subport->tc_credits_per_period[3] -
- (tc_ov_consumption[0] + tc_ov_consumption[1] + tc_ov_consumption[2]);
+ tc_ov_consumption_max =
+ subport->tc_credits_per_period[RTE_SCHED_MAX_TC] - consumption;
- if (tc_ov_consumption[3] > (tc_ov_consumption_max - port->mtu)) {
+ if (tc_ov_consumption[RTE_SCHED_MAX_TC] >
+ (tc_ov_consumption_max - port->mtu)) {
tc_ov_wm -= tc_ov_wm >> 7;
if (tc_ov_wm < subport->tc_ov_wm_min)
tc_ov_wm = subport->tc_ov_wm_min;
@@ -1574,10 +1640,9 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
if (unlikely(port->time >= subport->tc_time)) {
subport->tc_ov_wm = grinder_tc_ov_credits_update(port, pos);
- subport->tc_credits[0] = subport->tc_credits_per_period[0];
- subport->tc_credits[1] = subport->tc_credits_per_period[1];
- subport->tc_credits[2] = subport->tc_credits_per_period[2];
- subport->tc_credits[3] = subport->tc_credits_per_period[3];
+ for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++)
+ subport->tc_credits[tc] =
+ subport->tc_credits_per_period[tc];
subport->tc_time = port->time + subport->tc_period;
subport->tc_ov_period_id++;
@@ -1585,10 +1650,10 @@ grinder_credits_update(struct rte_sched_port *port, uint32_t pos)
/* Pipe TCs */
if (unlikely(port->time >= pipe->tc_time)) {
- pipe->tc_credits[0] = params->tc_credits_per_period[0];
- pipe->tc_credits[1] = params->tc_credits_per_period[1];
- pipe->tc_credits[2] = params->tc_credits_per_period[2];
- pipe->tc_credits[3] = params->tc_credits_per_period[3];
+ for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) {
+ pipe->tc_credits[tc] =
+ params->tc_credits_per_period[tc];
+
pipe->tc_time = port->time + params->tc_period;
}
@@ -1840,6 +1905,7 @@ grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
struct rte_mbuf **qbase;
uint32_t qindex;
uint16_t qsize;
+ uint32_t q;
if (grinder->tccache_r == grinder->tccache_w)
return 0;
@@ -1848,24 +1914,16 @@ grinder_next_tc(struct rte_sched_port *port, uint32_t pos)
qbase = rte_sched_port_qbase(port, qindex);
qsize = rte_sched_port_qsize(port, qindex);
- grinder->tc_index = (qindex >> 2) & 0x3;
+ grinder->tc_index = (qindex >> RTE_SCHED_WRR_SHIFT) &
+RTE_SCHED_TC_MASK;
+
grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
grinder->qsize = qsize;
- grinder->qindex[0] = qindex;
- grinder->qindex[1] = qindex + 1;
- grinder->qindex[2] = qindex + 2;
- grinder->qindex[3] = qindex + 3;
-
- grinder->queue[0] = port->queue + qindex;
- grinder->queue[1] = port->queue + qindex + 1;
- grinder->queue[2] = port->queue + qindex + 2;
- grinder->queue[3] = port->queue + qindex + 3;
-
- grinder->qbase[0] = qbase;
- grinder->qbase[1] = qbase + qsize;
- grinder->qbase[2] = qbase + 2 * qsize;
- grinder->qbase[3] = qbase + 3 * qsize;
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++) {
+ grinder->qindex[q] = qindex + q;
+ grinder->queue[q] = port->queue + qindex + q;
+ grinder->qbase[q] = qbase + (q * qsize);
+ }
grinder->tccache_r++;
return 1;
@@ -1910,7 +1968,8 @@ grinder_next_pipe(struct rte_sched_port *port, uint32_t pos)
}
/* Install new pipe in the grinder */
- grinder->pindex = pipe_qindex >> 4;
+ grinder->pindex = pipe_qindex >> (RTE_SCHED_TC_SHIFT +
+ RTE_SCHED_WRR_SHIFT);
grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport);
grinder->pipe = port->pipe + grinder->pindex;
grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */ @@ -1938,23 +1997,18 @@ grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
uint32_t tc_index = grinder->tc_index;
uint32_t qmask = grinder->qmask;
uint32_t qindex;
+ uint32_t q;
+ uint8_t tokens;
- qindex = tc_index * 4;
-
- grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT;
- grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT;
- grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT;
- grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT;
+ qindex = tc_index * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
- grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
- grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
- grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
- grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
-
- grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
- grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
- grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
- grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++) {
+ tokens = ((uint16_t) pipe->wrr_tokens[qindex + q]) <<
+ RTE_SCHED_WRR_SHIFT;
+ grinder->wrr_tokens[q] = tokens;
+ grinder->wrr_mask[q] = ((qmask >> q) & 0x1) * 0xFFFF;
+ grinder->wrr_cost[q] = pipe_params->wrr_cost[qindex + q];
+ }
}
static inline void
@@ -1964,17 +2018,15 @@ grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
struct rte_sched_pipe *pipe = grinder->pipe;
uint32_t tc_index = grinder->tc_index;
uint32_t qindex;
-
- qindex = tc_index * 4;
-
- pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0])
- >> RTE_SCHED_WRR_SHIFT;
- pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1])
- >> RTE_SCHED_WRR_SHIFT;
- pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2])
- >> RTE_SCHED_WRR_SHIFT;
- pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3])
- >> RTE_SCHED_WRR_SHIFT;
+ uint32_t q;
+ uint8_t tokens;
+
+ qindex = tc_index * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++) {
+ tokens = (grinder->wrr_tokens[q] & grinder->wrr_mask[q]) >>
+ RTE_SCHED_WRR_SHIFT;
+ pipe->wrr_tokens[qindex + q] = tokens;
+ }
}
static inline void
@@ -1982,19 +2034,17 @@ grinder_wrr(struct rte_sched_port *port, uint32_t pos) {
struct rte_sched_grinder *grinder = port->grinder + pos;
uint16_t wrr_tokens_min;
+ uint32_t q;
- grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
- grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
- grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
- grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++)
+ grinder->wrr_tokens[q] |= ~grinder->wrr_mask[q];
- grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
+ grinder->qpos = rte_min_pos_n_u16(grinder->wrr_tokens,
+ RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
- grinder->wrr_tokens[0] -= wrr_tokens_min;
- grinder->wrr_tokens[1] -= wrr_tokens_min;
- grinder->wrr_tokens[2] -= wrr_tokens_min;
- grinder->wrr_tokens[3] -= wrr_tokens_min;
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++)
+ grinder->wrr_tokens[q] -= wrr_tokens_min;
}
@@ -2013,13 +2063,12 @@ static inline void grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) {
struct rte_sched_grinder *grinder = port->grinder + pos;
- uint16_t qsize, qr[4];
+ uint16_t qsize, qr[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS];
+ uint32_t q;
qsize = grinder->qsize;
- qr[0] = grinder->queue[0]->qr & (qsize - 1);
- qr[1] = grinder->queue[1]->qr & (qsize - 1);
- qr[2] = grinder->queue[2]->qr & (qsize - 1);
- qr[3] = grinder->queue[3]->qr & (qsize - 1);
+ for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++)
+ qr[q] = grinder->queue[q]->qr & (qsize - 1);
rte_prefetch0(grinder->qbase[0] + qr[0]);
rte_prefetch0(grinder->qbase[1] + qr[1]); @@ -2027,8 +2076,9 @@ grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos)
grinder_wrr_load(port, pos);
grinder_wrr(port, pos);
- rte_prefetch0(grinder->qbase[2] + qr[2]);
- rte_prefetch0(grinder->qbase[3] + qr[3]);
+ for (q = 2; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++)
+ rte_prefetch0(grinder->qbase[q] + qr[q]);
+
}
static inline void
diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h index e9c2817..0144b34 100644
--- a/lib/librte_sched/rte_sched.h
+++ b/lib/librte_sched/rte_sched.h
@@ -95,16 +95,31 @@ extern "C" {
#endif
/** Number of traffic classes per pipe (as well as subport).
- * Cannot be changed.
+ * Must be power of two.
*/
-#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE 4
-
-/** Number of queues per pipe traffic class. Cannot be changed. */
-#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS 4
+#define RTE_SCHED_TC_SHIFT 2
+#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE (1 << RTE_SCHED_TC_SHIFT)
+#define RTE_SCHED_TC_MASK \
+ (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)
+#define RTE_SCHED_MAX_TC \
+ (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)
+
+/** Number of queues per pipe traffic class. Must be power of two. */
+#define RTE_SCHED_WRR_SHIFT 2
+#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS (1 << RTE_SCHED_WRR_SHIFT)
+#define RTE_SCHED_WRR_MASK \
+ (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1)
+
+/** Combined TC-WRR shift and mask. */
+#define RTE_SCHED_TC_WRR_SHIFT \
+ (RTE_SCHED_TC_SHIFT + RTE_SCHED_WRR_SHIFT)
+
+#define RTE_SCHED_TC_WRR_MASK \
+ ((RTE_SCHED_TC_MASK << RTE_SCHED_TC_SHIFT) | RTE_SCHED_WRR_MASK)
/** Number of queues per pipe. */
#define RTE_SCHED_QUEUES_PER_PIPE \
- (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * \
+ (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * \
RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
/** Maximum number of pipe profiles that can be defined per port.
diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h
index aed144b..9693269 100644
--- a/lib/librte_sched/rte_sched_common.h
+++ b/lib/librte_sched/rte_sched_common.h
@@ -77,6 +77,22 @@ rte_min_pos_4_u16(uint16_t *x)
return pos0;
}
+static inline uint32_t
+rte_min_pos_n_u16(uint16_t *x, uint32_t n) {
+ uint32_t index;
+ uint32_t min_index = 0;
+ uint16_t min_value = UINT16_MAX;
+
+ for (index = 0; index < n; index++) {
+ if (x[index] < min_value) {
+ min_value = x[index];
+ min_index = index;
+ }
+ }
+ return min_index;
+}
+
#endif
/*
--
2.1.4
next prev parent reply other threads:[~2017-10-05 10:19 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-05 9:20 alangordondewar
2017-10-05 10:19 ` Robertson, Alan [this message]
2017-10-06 9:12 ` Alan Dewar
2018-02-16 15:44 ` [dpdk-dev] [RFC v2] " alangordondewar
2018-02-16 20:09 ` Stephen Hemminger
2018-02-16 20:10 ` Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=051E977EFCB14842A3BEA5680BAFC91B53B3D7@gbcdcmbx03.intl.att.com \
--to=ar771e@intl.att.com \
--cc=alan.dewar@att.com \
--cc=alangordondewar@gmail.com \
--cc=cristian.dumitrescu@intel.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).