From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <johndale@cisco.com>
Received: from alln-iport-8.cisco.com (alln-iport-8.cisco.com [173.37.142.95])
 by dpdk.org (Postfix) with ESMTP id 866DB5A5E
 for <dev@dpdk.org>; Tue, 24 May 2016 08:33:03 +0200 (CEST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
 d=cisco.com; i=@cisco.com; l=6743; q=dns/txt; s=iport;
 t=1464071583; x=1465281183;
 h=from:to:cc:subject:date:message-id:in-reply-to: references;
 bh=bwq2e7sp9dvFYPOdT7oWRUrTSf3t9jmIOgst/OSz+S0=;
 b=jdr1YOxt8xP6kd/RyKhHsINM9PzZKUNU47b5bo3VMXI/CIC2t19oIwOt
 MyaMgm3bqkaqCPPbIq08kammWTjPlMGeJRbRvZDofep1GYI79LjFunnpo
 uQGSW0VGXNrlF+0Z8HPDvuumIxsfnaCrsPU6fz/xr/eTpDYgovblG5nMj 0=;
X-IronPort-AV: E=Sophos;i="5.26,359,1459814400"; d="scan'208";a="276977655"
Received: from rcdn-core-11.cisco.com ([173.37.93.147])
 by alln-iport-8.cisco.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
 24 May 2016 06:33:02 +0000
Received: from cisco.com (savbu-usnic-a.cisco.com [10.193.184.48])
 by rcdn-core-11.cisco.com (8.14.5/8.14.5) with ESMTP id u4O6X2Kv001297;
 Tue, 24 May 2016 06:33:02 GMT
Received: by cisco.com (Postfix, from userid 392789)
 id 821A73FAAE25; Mon, 23 May 2016 23:33:02 -0700 (PDT)
From: John Daley <johndale@cisco.com>
To: dev@dpdk.org
Cc: John Daley <johndale@cisco.com>, Nelson Escobar <neescoba@cisco.com>
Date: Mon, 23 May 2016 23:32:57 -0700
Message-Id: <1464071579-30072-10-git-send-email-johndale@cisco.com>
X-Mailer: git-send-email 2.7.0
In-Reply-To: <1464071579-30072-1-git-send-email-johndale@cisco.com>
References: <1464071579-30072-1-git-send-email-johndale@cisco.com>
Subject: [dpdk-dev] [PATCH v2 09/11] enic: optimize the Tx function
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Tue, 24 May 2016 06:33:04 -0000

Reduce host CPU overhead of Tx packet processing:
* Use local variables inside per packet loop instead of fields in structs.
* Factor book keeping and conditionals out of the per packet loop where
  possible.
* Post buffers to the nic at a maximum of every 64 packets

Signed-off-by: Nelson Escobar <neescoba@cisco.com>
Signed-off-by: John Daley <johndale@cisco.com>
---
 drivers/net/enic/base/vnic_wq.h |   1 +
 drivers/net/enic/enic_res.h     |   2 +-
 drivers/net/enic/enic_rxtx.c    | 167 +++++++++++++++++++---------------------
 3 files changed, 83 insertions(+), 87 deletions(-)

diff --git a/drivers/net/enic/base/vnic_wq.h b/drivers/net/enic/base/vnic_wq.h
index 689b81c..7a66813 100644
--- a/drivers/net/enic/base/vnic_wq.h
+++ b/drivers/net/enic/base/vnic_wq.h
@@ -67,6 +67,7 @@ struct vnic_wq_ctrl {
 
 /* 16 bytes */
 struct vnic_wq_buf {
+	struct rte_mempool *pool;
 	void *mb;
 };
 
diff --git a/drivers/net/enic/enic_res.h b/drivers/net/enic/enic_res.h
index 955db71..3c8e303 100644
--- a/drivers/net/enic/enic_res.h
+++ b/drivers/net/enic/enic_res.h
@@ -53,7 +53,7 @@
 
 #define ENIC_NON_TSO_MAX_DESC		16
 #define ENIC_DEFAULT_RX_FREE_THRESH	32
-#define ENIC_TX_POST_THRESH		(ENIC_MIN_WQ_DESCS / 2)
+#define ENIC_TX_XMIT_MAX		64
 
 #define ENIC_SETTING(enic, f) ((enic->config.flags & VENETF_##f) ? 1 : 0)
 
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index ec8d90a..ba15670 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -374,114 +374,109 @@ unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq)
 	return 0;
 }
 
-void enic_post_wq_index(struct vnic_wq *wq)
-{
-	enic_vnic_post_wq_index(wq);
-}
-
-void enic_send_pkt(struct enic *enic, struct vnic_wq *wq,
-		   struct rte_mbuf *tx_pkt, unsigned short len,
-		   uint8_t sop, uint8_t eop, uint8_t cq_entry,
-		   uint16_t ol_flags, uint16_t vlan_tag)
-{
-	struct wq_enet_desc *desc, *descs;
-	uint16_t mss = 0;
-	uint8_t vlan_tag_insert = 0;
-	uint64_t bus_addr = (dma_addr_t)
-	    (tx_pkt->buf_physaddr + tx_pkt->data_off);
-
-	descs = (struct wq_enet_desc *)wq->ring.descs;
-	desc = descs + wq->head_idx;
-
-	if (sop) {
-		if (ol_flags & PKT_TX_VLAN_PKT)
-			vlan_tag_insert = 1;
-
-		if (enic->hw_ip_checksum) {
-			if (ol_flags & PKT_TX_IP_CKSUM)
-				mss |= ENIC_CALC_IP_CKSUM;
-
-			if (ol_flags & PKT_TX_TCP_UDP_CKSUM)
-				mss |= ENIC_CALC_TCP_UDP_CKSUM;
-		}
-	}
-
-	wq_enet_desc_enc(desc,
-		bus_addr,
-		len,
-		mss,
-		0 /* header_length */,
-		0 /* offload_mode WQ_ENET_OFFLOAD_MODE_CSUM */,
-		eop,
-		cq_entry,
-		0 /* fcoe_encap */,
-		vlan_tag_insert,
-		vlan_tag,
-		0 /* loopback */);
-
-	enic_vnic_post_wq(wq, (void *)tx_pkt, cq_entry);
-}
-
 uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t nb_pkts)
 {
 	uint16_t index;
-	unsigned int frags;
-	unsigned int pkt_len;
-	unsigned int seg_len;
-	unsigned int inc_len;
+	unsigned int pkt_len, data_len;
 	unsigned int nb_segs;
-	struct rte_mbuf *tx_pkt, *next_tx_pkt;
+	struct rte_mbuf *tx_pkt;
 	struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
 	struct enic *enic = vnic_dev_priv(wq->vdev);
 	unsigned short vlan_id;
 	unsigned short ol_flags;
-	uint8_t last_seg, eop;
-	unsigned int host_tx_descs = 0;
+	unsigned int wq_desc_avail;
+	int head_idx;
+	struct vnic_wq_buf *buf;
+	unsigned int hw_ip_cksum_enabled;
+	unsigned int desc_count;
+	struct wq_enet_desc *descs, *desc_p, desc_tmp;
+	uint16_t mss;
+	uint8_t vlan_tag_insert;
+	uint8_t eop;
+	uint64_t bus_addr;
 
+	enic_cleanup_wq(enic, wq);
+	wq_desc_avail = vnic_wq_desc_avail(wq);
+	head_idx = wq->head_idx;
+	desc_count = wq->ring.desc_count;
+
+	nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
+
+	hw_ip_cksum_enabled = enic->hw_ip_checksum;
 	for (index = 0; index < nb_pkts; index++) {
 		tx_pkt = *tx_pkts++;
-		inc_len = 0;
 		nb_segs = tx_pkt->nb_segs;
-		if (nb_segs > vnic_wq_desc_avail(wq)) {
+		if (nb_segs > wq_desc_avail) {
 			if (index > 0)
-				enic_post_wq_index(wq);
-
-			/* wq cleanup and try again */
-			if (!enic_cleanup_wq(enic, wq) ||
-				(nb_segs > vnic_wq_desc_avail(wq))) {
-				return index;
-			}
+				goto post;
+			goto done;
 		}
 
 		pkt_len = tx_pkt->pkt_len;
+		data_len = tx_pkt->data_len;
 		vlan_id = tx_pkt->vlan_tci;
 		ol_flags = tx_pkt->ol_flags;
-		for (frags = 0; inc_len < pkt_len; frags++) {
-			if (!tx_pkt)
-				break;
-			next_tx_pkt = tx_pkt->next;
-			seg_len = tx_pkt->data_len;
-			inc_len += seg_len;
-
-			host_tx_descs++;
-			last_seg = 0;
-			eop = 0;
-			if ((pkt_len == inc_len) || !next_tx_pkt) {
-				eop = 1;
-				/* post if last packet in batch or > thresh */
-				if ((index == (nb_pkts - 1)) ||
-				   (host_tx_descs > ENIC_TX_POST_THRESH)) {
-					last_seg = 1;
-					host_tx_descs = 0;
-				}
+
+		mss = 0;
+		vlan_tag_insert = 0;
+		bus_addr = (dma_addr_t)
+			   (tx_pkt->buf_physaddr + tx_pkt->data_off);
+
+		descs = (struct wq_enet_desc *)wq->ring.descs;
+		desc_p = descs + head_idx;
+
+		eop = (data_len == pkt_len);
+
+		if (ol_flags & PKT_TX_VLAN_PKT)
+			vlan_tag_insert = 1;
+
+		if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_IP_CKSUM))
+			mss |= ENIC_CALC_IP_CKSUM;
+
+		if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_TCP_UDP_CKSUM))
+			mss |= ENIC_CALC_TCP_UDP_CKSUM;
+
+		wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop,
+				 eop, 0, vlan_tag_insert, vlan_id, 0);
+
+		*desc_p = desc_tmp;
+		buf = &wq->bufs[head_idx];
+		buf->mb = (void *)tx_pkt;
+		head_idx = enic_ring_incr(desc_count, head_idx);
+		wq_desc_avail--;
+
+		if (!eop) {
+			for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt =
+			    tx_pkt->next) {
+				data_len = tx_pkt->data_len;
+
+				if (tx_pkt->next == NULL)
+					eop = 1;
+				desc_p = descs + head_idx;
+				bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr
+					   + tx_pkt->data_off);
+				wq_enet_desc_enc((struct wq_enet_desc *)
+						 &desc_tmp, bus_addr, data_len,
+						 mss, 0, 0, eop, eop, 0,
+						 vlan_tag_insert, vlan_id, 0);
+
+				*desc_p = desc_tmp;
+				buf = &wq->bufs[head_idx];
+				buf->mb = (void *)tx_pkt;
+				head_idx = enic_ring_incr(desc_count, head_idx);
+				wq_desc_avail--;
 			}
-			enic_send_pkt(enic, wq, tx_pkt, (unsigned short)seg_len,
-				      !frags, eop, last_seg, ol_flags, vlan_id);
-			tx_pkt = next_tx_pkt;
 		}
 	}
+ post:
+	rte_wmb();
+	iowrite32(head_idx, &wq->ctrl->posted_index);
+ done:
+	wq->ring.desc_avail = wq_desc_avail;
+	wq->head_idx = head_idx;
 
-	enic_cleanup_wq(enic, wq);
 	return index;
 }
+
+
-- 
2.7.0