From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <tomaszx.kulasek@intel.com>
Received: from mga06.intel.com (mga06.intel.com [134.134.136.31])
 by dpdk.org (Postfix) with ESMTP id 12F2210D50
 for <dev@dpdk.org>; Thu, 22 Dec 2016 14:05:52 +0100 (CET)
Received: from orsmga002.jf.intel.com ([10.7.209.21])
 by orsmga104.jf.intel.com with ESMTP; 22 Dec 2016 05:05:52 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.33,388,1477983600"; d="scan'208";a="21641717"
Received: from unknown (HELO Sent) ([10.103.102.79])
 by orsmga002.jf.intel.com with SMTP; 22 Dec 2016 05:05:50 -0800
Received: by Sent (sSMTP sendmail emulation); Thu, 22 Dec 2016 14:05:49 +0100
From: Tomasz Kulasek <tomaszx.kulasek@intel.com>
To: dev@dpdk.org
Date: Thu, 22 Dec 2016 14:05:19 +0100
Message-Id: <1482411919-7620-9-git-send-email-tomaszx.kulasek@intel.com>
X-Mailer: git-send-email 2.1.4
In-Reply-To: <1482411919-7620-1-git-send-email-tomaszx.kulasek@intel.com>
References: <1481650914-40324-1-git-send-email-tomaszx.kulasek@intel.com>
 <1482411919-7620-1-git-send-email-tomaszx.kulasek@intel.com>
Subject: [dpdk-dev] [PATCH v14 8/8] testpmd: use Tx preparation in csum
	engine
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Thu, 22 Dec 2016 13:05:53 -0000

Since all current drivers supports Tx preparation API, it is used
in csum forwarding engine by default for all drivers.

Adding additional step to the csum engine costs about 3-4% of performance
drop, on my setup with ixgbe driver. It's caused mostly by the need
of reaccessing and modification of packet data.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
 app/test-pmd/csumonly.c |   37 ++++++++++++++++---------------------
 app/test-pmd/testpmd.c  |    5 +++++
 app/test-pmd/testpmd.h  |    2 ++
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..806f957 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -112,15 +112,6 @@ struct simple_gre_hdr {
 } __attribute__((__packed__));
 
 static uint16_t
-get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
-{
-	if (ethertype == _htons(ETHER_TYPE_IPv4))
-		return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
-	else /* assume ethertype == ETHER_TYPE_IPv6 */
-		return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
-}
-
-static uint16_t
 get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype)
 {
 	if (ethertype == _htons(ETHER_TYPE_IPv4))
@@ -370,11 +361,9 @@ struct simple_gre_hdr {
 		/* do not recalculate udp cksum if it was 0 */
 		if (udp_hdr->dgram_cksum != 0) {
 			udp_hdr->dgram_cksum = 0;
-			if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) {
+			if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM)
 				ol_flags |= PKT_TX_UDP_CKSUM;
-				udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
-					info->ethertype, ol_flags);
-			} else {
+			else {
 				udp_hdr->dgram_cksum =
 					get_udptcp_checksum(l3_hdr, udp_hdr,
 						info->ethertype);
@@ -383,15 +372,11 @@ struct simple_gre_hdr {
 	} else if (info->l4_proto == IPPROTO_TCP) {
 		tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
 		tcp_hdr->cksum = 0;
-		if (tso_segsz) {
+		if (tso_segsz)
 			ol_flags |= PKT_TX_TCP_SEG;
-			tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-				ol_flags);
-		} else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) {
+		else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM)
 			ol_flags |= PKT_TX_TCP_CKSUM;
-			tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype,
-				ol_flags);
-		} else {
+		else {
 			tcp_hdr->cksum =
 				get_udptcp_checksum(l3_hdr, tcp_hdr,
 					info->ethertype);
@@ -648,6 +633,7 @@ struct simple_gre_hdr {
 	void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */
 	uint16_t nb_rx;
 	uint16_t nb_tx;
+	uint16_t nb_prep;
 	uint16_t i;
 	uint64_t rx_ol_flags, tx_ol_flags;
 	uint16_t testpmd_ol_flags;
@@ -857,7 +843,16 @@ struct simple_gre_hdr {
 			printf("\n");
 		}
 	}
-	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx);
+
+	nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue,
+			pkts_burst, nb_rx);
+	if (nb_prep != nb_rx)
+		printf("Preparing packet burst to transmit failed: %s\n",
+				rte_strerror(rte_errno));
+
+	nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
+			nb_prep);
+
 	/*
 	 * Retry if necessary
 	 */
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index a0332c2..634f10b 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -180,6 +180,11 @@ struct fwd_engine * fwd_engines[] = {
 enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
 /**< Split policy for packets to TX. */
 
+/*
+ * Enable Tx preparation path in the "csum" engine.
+ */
+uint8_t tx_prepare;
+
 uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */
 uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */
 
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 9c1e703..488a6e1 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -383,6 +383,8 @@ enum tx_pkt_split {
 
 extern enum tx_pkt_split tx_pkt_split;
 
+extern uint8_t tx_prepare;
+
 extern uint16_t nb_pkt_per_burst;
 extern uint16_t mb_mempool_cache;
 extern int8_t rx_pthresh;
-- 
1.7.9.5