From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id 49CF2A046B for ; Tue, 25 Jun 2019 17:35:45 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id DEA831BB86; Tue, 25 Jun 2019 17:32:43 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id CA2EB1BB09 for ; Tue, 25 Jun 2019 17:32:22 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga105.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 25 Jun 2019 08:32:22 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,416,1557212400"; d="scan'208";a="166711566" Received: from silpixa00381635.ir.intel.com (HELO silpixa00381635.ger.corp.intel.com) ([10.237.223.4]) by orsmga006.jf.intel.com with ESMTP; 25 Jun 2019 08:32:21 -0700 From: Jasvinder Singh To: dev@dpdk.org Cc: cristian.dumitrescu@intel.com, Abraham Tovar , Lukasz Krakowiak Date: Tue, 25 Jun 2019 16:32:05 +0100 Message-Id: <20190625153217.24301-17-jasvinder.singh@intel.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190625153217.24301-1-jasvinder.singh@intel.com> References: <20190528120553.2992-2-lukaszx.krakowiak@intel.com> <20190625153217.24301-1-jasvinder.singh@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v2 16/28] sched: update grinder wrr compute function X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Update weighted round robin function for best-effort traffic class queues to allow configuration flexiblity for pipe traffic classes and queues, and subport level configuration of the pipe parameters. Signed-off-by: Jasvinder Singh Signed-off-by: Abraham Tovar Signed-off-by: Lukasz Krakowiak --- lib/librte_sched/rte_sched.c | 111 ++++++++++++++++++---------- lib/librte_sched/rte_sched_common.h | 41 ++++++++++ 2 files changed, 111 insertions(+), 41 deletions(-) diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 00ee9e7a2..90c41e549 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -2416,71 +2416,100 @@ grinder_next_pipe(struct rte_sched_subport *subport, uint32_t pos) static inline void -grinder_wrr_load(struct rte_sched_port *port, uint32_t pos) +grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params; - uint32_t tc_index = grinder->tc_index; uint32_t qmask = grinder->qmask; - uint32_t qindex; - - qindex = tc_index * 4; - - grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT; - - grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF; - grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF; - grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF; - grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF; + uint32_t qindex = grinder->qindex[0]; + uint32_t i; - grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex]; - grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1]; - grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2]; - grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3]; + for (i = 0; i < pipe->n_be_queues; i++) { + grinder->wrr_tokens[i] = + ((uint16_t) pipe->wrr_tokens[qindex + i]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_mask[i] = ((qmask >> i) & 0x1) * 0xFFFF; + grinder->wrr_cost[i] = pipe_params->wrr_cost[qindex + i]; + } } static inline void -grinder_wrr_store(struct rte_sched_port *port, uint32_t pos) +grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; - uint32_t tc_index = grinder->tc_index; - uint32_t qindex; - - qindex = tc_index * 4; + uint32_t i; - pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) - >> RTE_SCHED_WRR_SHIFT; + for (i = 0; i < pipe->n_be_queues; i++) + pipe->wrr_tokens[i] = + (grinder->wrr_tokens[i] & grinder->wrr_mask[i]) >> + RTE_SCHED_WRR_SHIFT; } static inline void -grinder_wrr(struct rte_sched_port *port, uint32_t pos) +grinder_wrr(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; + struct rte_sched_pipe *pipe = grinder->pipe; + uint32_t n_be_queues = pipe->n_be_queues; uint16_t wrr_tokens_min; + if (n_be_queues == 1) { + grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; + grinder->qpos = 0; + wrr_tokens_min = grinder->wrr_tokens[0]; + grinder->wrr_tokens[0] -= wrr_tokens_min; + return; + } + + if (n_be_queues == 2) { + grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; + grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1]; + + grinder->qpos = rte_min_pos_2_u16(grinder->wrr_tokens); + wrr_tokens_min = grinder->wrr_tokens[grinder->qpos]; + + grinder->wrr_tokens[0] -= wrr_tokens_min; + grinder->wrr_tokens[1] -= wrr_tokens_min; + return; + } + + if (n_be_queues == 4) { + grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; + grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1]; + grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2]; + grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3]; + + grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens); + wrr_tokens_min = grinder->wrr_tokens[grinder->qpos]; + + grinder->wrr_tokens[0] -= wrr_tokens_min; + grinder->wrr_tokens[1] -= wrr_tokens_min; + grinder->wrr_tokens[2] -= wrr_tokens_min; + grinder->wrr_tokens[3] -= wrr_tokens_min; + return; + } + grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1]; grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2]; grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3]; + grinder->wrr_tokens[4] |= ~grinder->wrr_mask[4]; + grinder->wrr_tokens[5] |= ~grinder->wrr_mask[5]; + grinder->wrr_tokens[6] |= ~grinder->wrr_mask[6]; + grinder->wrr_tokens[7] |= ~grinder->wrr_mask[7]; - grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens); + grinder->qpos = rte_min_pos_8_u16(grinder->wrr_tokens); wrr_tokens_min = grinder->wrr_tokens[grinder->qpos]; grinder->wrr_tokens[0] -= wrr_tokens_min; grinder->wrr_tokens[1] -= wrr_tokens_min; grinder->wrr_tokens[2] -= wrr_tokens_min; grinder->wrr_tokens[3] -= wrr_tokens_min; + grinder->wrr_tokens[4] -= wrr_tokens_min; + grinder->wrr_tokens[5] -= wrr_tokens_min; + grinder->wrr_tokens[6] -= wrr_tokens_min; + grinder->wrr_tokens[7] -= wrr_tokens_min; } @@ -2522,8 +2551,8 @@ grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) rte_prefetch0(grinder->qbase[i] + qr[i]); } - grinder_wrr_load(port, pos); - grinder_wrr(port, pos); + grinder_wrr_load(port->subport, pos); + grinder_wrr(port->subport, pos); } static inline void @@ -2592,12 +2621,12 @@ grinder_handle(struct rte_sched_port *port, uint32_t pos) /* Look for next packet within the same TC */ if (result && grinder->qmask) { - grinder_wrr(port, pos); + grinder_wrr(port->subport, pos); grinder_prefetch_mbuf(port, pos); return 1; } - grinder_wrr_store(port, pos); + grinder_wrr_store(port->subport, pos); /* Look for another active TC within same pipe */ if (grinder_next_tc(port->subport, pos)) { diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h index 8c191a9b8..bb3595f26 100644 --- a/lib/librte_sched/rte_sched_common.h +++ b/lib/librte_sched/rte_sched_common.h @@ -20,6 +20,18 @@ rte_sched_min_val_2_u32(uint32_t x, uint32_t y) return (x < y)? x : y; } +/* Simplified version to remove branches with CMOV instruction */ +static inline uint32_t +rte_min_pos_2_u16(uint16_t *x) +{ + uint32_t pos0 = 0; + + if (x[1] <= x[0]) + pos0 = 1; + + return pos0; +} + #if 0 static inline uint32_t rte_min_pos_4_u16(uint16_t *x) @@ -50,6 +62,35 @@ rte_min_pos_4_u16(uint16_t *x) #endif +/* Simplified version to remove branches with CMOV instruction */ +static inline uint32_t +rte_min_pos_8_u16(uint16_t *x) +{ + uint32_t pos0 = 0; + uint32_t pos1 = 2; + uint32_t pos2 = 4; + uint32_t pos3 = 6; + + if (x[1] <= x[0]) + pos0 = 1; + if (x[3] <= x[2]) + pos1 = 3; + if (x[5] <= x[4]) + pos2 = 5; + if (x[7] <= x[6]) + pos3 = 7; + + if (x[pos1] <= x[pos0]) + pos0 = pos1; + if (x[pos3] <= x[pos2]) + pos2 = pos3; + + if (x[pos2] <= x[pos0]) + pos0 = pos2; + + return pos0; +} + /* * Compute the Greatest Common Divisor (GCD) of two numbers. * This implementation uses Euclid's algorithm: -- 2.21.0