From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wm0-f51.google.com (mail-wm0-f51.google.com [74.125.82.51]) by dpdk.org (Postfix) with ESMTP id 382012BEF for ; Thu, 24 Nov 2016 17:04:03 +0100 (CET) Received: by mail-wm0-f51.google.com with SMTP id t79so66078534wmt.0 for ; Thu, 24 Nov 2016 08:04:03 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=6wind-com.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id:in-reply-to:references :in-reply-to:references; bh=/X1WR7n1vNmsIthnGA/BKfPnQjFmapCVYYMT8MzEpVM=; b=HD10RUoufSOJYKsq1vUZEF47XOO1VFEKyiOkdvPeFQQZP/hoMyOjL/KcjfRvR+KbGL ZyvgsbDZ3UTlkD08thGhDkCdD/zNLS4XRUJDV0rqPvCW7IJrr7yBlL2xxaHGvq+eFMcY hvdM7VaNCjkgXQKYcf+CbU4vePtlEpOl0uEjtlM4wVm2W/O4e0WJn68ruris8UwauhB3 9aI1XTdwcAXIXDQw/zG+rmyQgm8dkon88ew2AE1/2jSV/yYXdInETo6pJjYrnNrNVB/S O5BKTXG1e3bgqgMF3GzgTd0SuWiE0627xTyKy0FBZadHiqsRD69z1P7zly2K7DmyLzPL 30nQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:in-reply-to:references; bh=/X1WR7n1vNmsIthnGA/BKfPnQjFmapCVYYMT8MzEpVM=; b=ZgSG5HyOffEDJW38KnixVL8wAx6Q6quO6DTxxmFn+OrxggYRZB6uPNz2Dk+5lmJi+9 iEZZMWxzXRVgSzjSHvjmyMkka/32fLGKjyEolLapYjpkCdXI8c1i/gPv5YDb7qjLNciM SxyEb2PCaB/MZ4Ik3hvqfendpT6tn6SZ07YtsNkqJEeVCW+QRp+KBqfv1aTQbtlPmhnr ySldXBkbwDQV42MYFNF+n7PkkPBV29EtvUFYepKcfCt+D1wljEYxDNTCuYIemaw5RKj1 ZADkVeDjZOupSGZiMwYb08FOmcPldpUOy7OlevHQcWulcRMJgyfNU5cSRO6SJaM0z8HK qVeA== X-Gm-Message-State: AKaTC02i+qR2xy2QzWKICclTfL4F0TUzb3HUZsev2PonEGR28MbfsvHD9dC74t2i6v23ov20 X-Received: by 10.28.22.193 with SMTP id 184mr3087538wmw.100.1480003442826; Thu, 24 Nov 2016 08:04:02 -0800 (PST) Received: from ping.vm.6wind.com (guy78-3-82-239-227-177.fbx.proxad.net. [82.239.227.177]) by smtp.gmail.com with ESMTPSA id vr9sm42495142wjc.35.2016.11.24.08.04.01 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Thu, 24 Nov 2016 08:04:02 -0800 (PST) From: Nelio Laranjeiro To: dev@dpdk.org Cc: Thomas Monjalon , Adrien Mazarguil Date: Thu, 24 Nov 2016 17:03:32 +0100 Message-Id: X-Mailer: git-send-email 2.1.4 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH 3/7] net/mlx5: use vector types to speed up processing X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 24 Nov 2016 16:04:03 -0000 Let compiler automatically use the vector capabilities of the target machine to optimize instructions. Signed-off-by: Nelio Laranjeiro Acked-by: Adrien Mazarguil --- drivers/net/mlx5/mlx5_prm.h | 7 +++++ drivers/net/mlx5/mlx5_rxtx.c | 74 +++++++++++++++++++++++--------------------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h index 3dd4cbe..9cd9fdf 100644 --- a/drivers/net/mlx5/mlx5_prm.h +++ b/drivers/net/mlx5/mlx5_prm.h @@ -44,6 +44,7 @@ #pragma GCC diagnostic error "-Wpedantic" #endif +#include #include "mlx5_autoconf.h" /* Get CQE owner bit. */ @@ -134,6 +135,12 @@ struct mlx5_wqe { struct mlx5_wqe_eth_seg_small eseg; }; +/* Vectorize WQE header. */ +struct mlx5_wqe_v { + rte_v128u32_t ctrl; + rte_v128u32_t eseg; +}; + /* WQE. */ struct mlx5_wqe64 { struct mlx5_wqe hdr; diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index ada8e74..e161cd9 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -371,7 +371,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int j = 0; unsigned int max; unsigned int comp; - volatile struct mlx5_wqe *wqe = NULL; + volatile struct mlx5_wqe_v *wqe = NULL; unsigned int segs_n = 0; struct rte_mbuf *buf = NULL; uint8_t *raw; @@ -388,12 +388,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max > elts_n) max -= elts_n; do { - volatile struct mlx5_wqe_data_seg *dseg = NULL; + volatile rte_v128u32_t *dseg = NULL; uint32_t length; unsigned int ds = 0; uintptr_t addr; uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE; uint8_t ehdr[2]; + uint8_t cs_flags = 0; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t total_length = 0; #endif @@ -412,7 +413,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) --segs_n; if (!segs_n) --pkts_n; - wqe = (volatile struct mlx5_wqe *) + wqe = (volatile struct mlx5_wqe_v *) tx_mlx5_wqe(txq, txq->wqe_ci); rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); if (pkts_n > 1) @@ -438,11 +439,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - wqe->eseg.cs_flags = - MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } else { - wqe->eseg.cs_flags = 0; + cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; } raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; /* @@ -498,12 +495,11 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) */ ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); if (length > 0) { - dseg = (volatile struct mlx5_wqe_data_seg *) + dseg = (volatile rte_v128u32_t *) ((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE)); if ((uintptr_t)dseg >= end) - dseg = (volatile struct - mlx5_wqe_data_seg *) + dseg = (volatile rte_v128u32_t *) txq->wqes; goto use_dseg; } else if (!segs_n) { @@ -516,16 +512,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) * No inline has been done in the packet, only the * Ethernet Header as been stored. */ - wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE); - dseg = (volatile struct mlx5_wqe_data_seg *) + dseg = (volatile rte_v128u32_t *) ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); ds = 3; use_dseg: /* Add the remaining packet as a simple ds. */ - *dseg = (volatile struct mlx5_wqe_data_seg) { - .addr = htonll(addr), - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + addr = htonll(addr); + *dseg = (rte_v128u32_t){ + htonl(length), + txq_mp2mr(txq, txq_mb2mp(buf)), + addr, + addr >> 32, }; ++ds; if (!segs_n) @@ -545,7 +542,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) & ((1 << txq->wqe_n) - 1); - dseg = (volatile struct mlx5_wqe_data_seg *) + dseg = (volatile rte_v128u32_t *) tx_mlx5_wqe(txq, n); rte_prefetch0(tx_mlx5_wqe(txq, n + 1)); } else { @@ -559,10 +556,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) total_length += length; #endif /* Store segment information. */ - *dseg = (volatile struct mlx5_wqe_data_seg) { - .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); + *dseg = (rte_v128u32_t){ + htonl(length), + txq_mp2mr(txq, txq_mb2mp(buf)), + addr, + addr >> 32, }; (*txq->elts)[elts_head] = buf; elts_head = (elts_head + 1) & (elts_n - 1); @@ -575,17 +574,19 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) next_pkt: ++i; /* Initialize known and common part of the WQE structure. */ - wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->ctrl[1] = htonl(txq->qp_num_8s | ds); - wqe->ctrl[2] = 0; - wqe->ctrl[3] = 0; - wqe->eseg.rsvd0 = 0; - wqe->eseg.rsvd1 = 0; - wqe->eseg.mss = 0; - wqe->eseg.rsvd2 = 0; - wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz); - wqe->eseg.inline_hdr[0] = ehdr[0]; - wqe->eseg.inline_hdr[1] = ehdr[1]; + wqe->ctrl = (rte_v128u32_t){ + htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), + htonl(txq->qp_num_8s | ds), + 0, + 0, + }; + wqe->eseg = (rte_v128u32_t){ + 0, + cs_flags, + 0, + (ehdr[1] << 24) | (ehdr[0] << 16) | + htons(pkt_inline_sz), + }; txq->wqe_ci += (ds + 3) / 4; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ @@ -598,10 +599,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Check whether completion threshold has been reached. */ comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { + volatile struct mlx5_wqe_ctrl *w = + (volatile struct mlx5_wqe_ctrl *)wqe; + /* Request completion on last WQE. */ - wqe->ctrl[2] = htonl(8); + w->ctrl2 = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->ctrl[3] = elts_head; + w->ctrl3 = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; -- 2.1.4