From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id D2DEBA045E for ; Tue, 28 May 2019 14:14:41 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 4327F1B9DA; Tue, 28 May 2019 14:08:46 +0200 (CEST) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id EB0C61B9B6 for ; Tue, 28 May 2019 14:08:33 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 28 May 2019 05:08:33 -0700 X-ExtLoop1: 1 Received: from lkrakowx-mobl.ger.corp.intel.com ([10.103.104.99]) by fmsmga001.fm.intel.com with ESMTP; 28 May 2019 05:08:31 -0700 From: Lukasz Krakowiak To: cristian.dumitrescu@intel.com Cc: dev@dpdk.org, Jasvinder Singh , Abraham Tovar , Lukasz Krakowiak Date: Tue, 28 May 2019 14:05:42 +0200 Message-Id: <20190528120553.2992-17-lukaszx.krakowiak@intel.com> X-Mailer: git-send-email 2.19.2.windows.1 In-Reply-To: <20190528120553.2992-1-lukaszx.krakowiak@intel.com> References: <20190528120553.2992-1-lukaszx.krakowiak@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH 16/27] sched: update grinder wrr compute function X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Jasvinder Singh Update weighted round robin function for best-effort traffic class queues of the scheduler to allow configuration flexiblity for pipe traffic classes and queues, and subport level configuration of the pipe parameters. Signed-off-by: Jasvinder Singh Signed-off-by: Abraham Tovar Signed-off-by: Lukasz Krakowiak --- lib/librte_sched/rte_sched.c | 135 +++++++++++++++++----------- lib/librte_sched/rte_sched_common.h | 41 +++++++++ 2 files changed, 125 insertions(+), 51 deletions(-) diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 07939c04f..a9b5f7bf8 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -2316,73 +2316,106 @@ grinder_next_pipe(struct rte_sched_subport *subport, uint32_t pos) return 1; } - static inline void -grinder_wrr_load(struct rte_sched_port *port, uint32_t pos) +grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params; - uint32_t tc_index = grinder->tc_index; - uint32_t qmask = grinder->qmask; - uint32_t qindex; - - qindex = tc_index * 4; - - grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT; - grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT; - - grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF; - grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF; - grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF; - grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF; + uint32_t qmask = grinder->be.qmask; + uint32_t qindex = grinder->be.qindex[0]; + uint32_t i; - grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex]; - grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1]; - grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2]; - grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3]; + for (i = 0; i < pipe->n_be_queues; i++) { + grinder->be.wrr_tokens[i] = + ((uint16_t) pipe->wrr_tokens[qindex + i]) << RTE_SCHED_WRR_SHIFT; + grinder->be.wrr_mask[i] = ((qmask >> i) & 0x1) * 0xFFFF; + grinder->be.wrr_cost[i] = pipe_params->wrr_cost[qindex + i]; + } } static inline void -grinder_wrr_store(struct rte_sched_port *port, uint32_t pos) +grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; struct rte_sched_pipe *pipe = grinder->pipe; uint32_t tc_index = grinder->tc_index; - uint32_t qindex; - - qindex = tc_index * 4; + uint32_t i; - pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) - >> RTE_SCHED_WRR_SHIFT; - pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) - >> RTE_SCHED_WRR_SHIFT; + if (tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) + for (i = 0; i < pipe->n_be_queues; i++) + pipe->wrr_tokens[i] = + (grinder->be.wrr_tokens[i] & grinder->be.wrr_mask[i]) >> + RTE_SCHED_WRR_SHIFT; } static inline void -grinder_wrr(struct rte_sched_port *port, uint32_t pos) +grinder_wrr(struct rte_sched_subport *subport, uint32_t pos) { - struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_grinder *grinder = subport->grinder + pos; + struct rte_sched_pipe *pipe = grinder->pipe; + uint32_t n_be_queues = pipe->n_be_queues; uint16_t wrr_tokens_min; - grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; - grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1]; - grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2]; - grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3]; + if (n_be_queues == 1) { + grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0]; + grinder->be.qpos = 0; + wrr_tokens_min = grinder->be.wrr_tokens[0]; + grinder->be.wrr_tokens[0] -= wrr_tokens_min; + return; + } + + if (n_be_queues == 2) { + grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0]; + grinder->be.wrr_tokens[1] |= ~grinder->be.wrr_mask[1]; + + grinder->be.qpos = rte_min_pos_2_u16(grinder->be.wrr_tokens); + wrr_tokens_min = grinder->be.wrr_tokens[grinder->be.qpos]; + + grinder->be.wrr_tokens[0] -= wrr_tokens_min; + grinder->be.wrr_tokens[1] -= wrr_tokens_min; + return; + } + + if (n_be_queues == 4) { + grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0]; + grinder->be.wrr_tokens[1] |= ~grinder->be.wrr_mask[1]; + grinder->be.wrr_tokens[2] |= ~grinder->be.wrr_mask[2]; + grinder->be.wrr_tokens[3] |= ~grinder->be.wrr_mask[3]; + + grinder->be.qpos = rte_min_pos_4_u16(grinder->be.wrr_tokens); + wrr_tokens_min = grinder->be.wrr_tokens[grinder->be.qpos]; - grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens); - wrr_tokens_min = grinder->wrr_tokens[grinder->qpos]; + grinder->be.wrr_tokens[0] -= wrr_tokens_min; + grinder->be.wrr_tokens[1] -= wrr_tokens_min; + grinder->be.wrr_tokens[2] -= wrr_tokens_min; + grinder->be.wrr_tokens[3] -= wrr_tokens_min; + return; + } - grinder->wrr_tokens[0] -= wrr_tokens_min; - grinder->wrr_tokens[1] -= wrr_tokens_min; - grinder->wrr_tokens[2] -= wrr_tokens_min; - grinder->wrr_tokens[3] -= wrr_tokens_min; + if (n_be_queues == 8) { + grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0]; + grinder->be.wrr_tokens[1] |= ~grinder->be.wrr_mask[1]; + grinder->be.wrr_tokens[2] |= ~grinder->be.wrr_mask[2]; + grinder->be.wrr_tokens[3] |= ~grinder->be.wrr_mask[3]; + grinder->be.wrr_tokens[4] |= ~grinder->be.wrr_mask[4]; + grinder->be.wrr_tokens[5] |= ~grinder->be.wrr_mask[5]; + grinder->be.wrr_tokens[6] |= ~grinder->be.wrr_mask[6]; + grinder->be.wrr_tokens[7] |= ~grinder->be.wrr_mask[7]; + + grinder->be.qpos = rte_min_pos_8_u16(grinder->be.wrr_tokens); + wrr_tokens_min = grinder->be.wrr_tokens[grinder->be.qpos]; + + grinder->be.wrr_tokens[0] -= wrr_tokens_min; + grinder->be.wrr_tokens[1] -= wrr_tokens_min; + grinder->be.wrr_tokens[2] -= wrr_tokens_min; + grinder->be.wrr_tokens[3] -= wrr_tokens_min; + grinder->be.wrr_tokens[4] -= wrr_tokens_min; + grinder->be.wrr_tokens[5] -= wrr_tokens_min; + grinder->be.wrr_tokens[6] -= wrr_tokens_min; + grinder->be.wrr_tokens[7] -= wrr_tokens_min; + return; + } } @@ -2423,8 +2456,8 @@ grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) rte_prefetch0(grinder->be.qbase[i] + qr[i]); } - grinder_wrr_load(port, pos); - grinder_wrr(port, pos); + grinder_wrr_load(port->subport, pos); + grinder_wrr(port->subport, pos); } static inline void @@ -2493,12 +2526,12 @@ grinder_handle(struct rte_sched_port *port, uint32_t pos) /* Look for next packet within the same TC */ if (result && grinder->qmask) { - grinder_wrr(port, pos); + grinder_wrr(port->subport, pos); grinder_prefetch_mbuf(port, pos); return 1; } - grinder_wrr_store(port, pos); + grinder_wrr_store(port->subport, pos); /* Look for another active TC within same pipe */ if (grinder_next_tc(port->subport, pos)) { diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h index 8c191a9b8..bb3595f26 100644 --- a/lib/librte_sched/rte_sched_common.h +++ b/lib/librte_sched/rte_sched_common.h @@ -20,6 +20,18 @@ rte_sched_min_val_2_u32(uint32_t x, uint32_t y) return (x < y)? x : y; } +/* Simplified version to remove branches with CMOV instruction */ +static inline uint32_t +rte_min_pos_2_u16(uint16_t *x) +{ + uint32_t pos0 = 0; + + if (x[1] <= x[0]) + pos0 = 1; + + return pos0; +} + #if 0 static inline uint32_t rte_min_pos_4_u16(uint16_t *x) @@ -50,6 +62,35 @@ rte_min_pos_4_u16(uint16_t *x) #endif +/* Simplified version to remove branches with CMOV instruction */ +static inline uint32_t +rte_min_pos_8_u16(uint16_t *x) +{ + uint32_t pos0 = 0; + uint32_t pos1 = 2; + uint32_t pos2 = 4; + uint32_t pos3 = 6; + + if (x[1] <= x[0]) + pos0 = 1; + if (x[3] <= x[2]) + pos1 = 3; + if (x[5] <= x[4]) + pos2 = 5; + if (x[7] <= x[6]) + pos3 = 7; + + if (x[pos1] <= x[pos0]) + pos0 = pos1; + if (x[pos3] <= x[pos2]) + pos2 = pos3; + + if (x[pos2] <= x[pos0]) + pos0 = pos2; + + return pos0; +} + /* * Compute the Greatest Common Divisor (GCD) of two numbers. * This implementation uses Euclid's algorithm: -- 2.20.1