[dpdk-dev] [PATCH 1/3] cxgbe: Fix RX performance for cxgbe PMD.

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
To: dev@dpdk.org
Cc: Felix Marti <felix@chelsio.com>,
	Kumar Sanghvi <kumaras@chelsio.com>,
	Nirranjan Kirubaharan <nirranjan@chelsio.com>
Subject: [dpdk-dev] [PATCH 1/3] cxgbe: Fix RX performance for cxgbe PMD.
Date: Tue,  7 Jul 2015 22:42:28 +0530	[thread overview]
Message-ID: <96172d6f608d59f6d8463d407f21d08a963d6d5a.1436288467.git.rahul.lakkireddy@chelsio.com> (raw)
In-Reply-To: <cover.1436288467.git.rahul.lakkireddy@chelsio.com>
In-Reply-To: <cover.1436288467.git.rahul.lakkireddy@chelsio.com>

CXGBE PMD rx allocates a new mbuf everytime, which could lead to performance
hit.  Instead, do bulk allocation of mbufs and re-use them.

Also, simplify the overall rx-handler, and update its logic to fix rx perf.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: Kumar Sanghvi <kumaras@chelsio.com>
---
 drivers/net/cxgbe/base/adapter.h        |   2 +
 drivers/net/cxgbe/base/t4_regs_values.h |   1 +
 drivers/net/cxgbe/sge.c                 | 239 ++++++++++++--------------------
 3 files changed, 89 insertions(+), 153 deletions(-)

diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 0ea1c95..a1e8ef7 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -149,6 +149,7 @@ struct sge_rspq {                   /* state for an SGE response queue */
 	unsigned int bar2_qid;      /* Queue ID for BAR2 Queue registers */
 
 	unsigned int cidx;          /* consumer index */
+	unsigned int gts_idx;	    /* last gts write sent */
 	unsigned int iqe_len;       /* entry size */
 	unsigned int size;          /* capacity of response queue */
 	int offset;                 /* offset into current Rx buffer */
@@ -157,6 +158,7 @@ struct sge_rspq {                   /* state for an SGE response queue */
 	u8 intr_params;             /* interrupt holdoff parameters */
 	u8 next_intr_params;        /* holdoff params for next interrupt */
 	u8 pktcnt_idx;              /* interrupt packet threshold */
+	u8 port_id;		    /* associated port-id */
 	u8 idx;                     /* queue index within its group */
 	u16 cntxt_id;               /* SGE relative QID for the response Q */
 	u16 abs_id;                 /* absolute SGE id for the response q */
diff --git a/drivers/net/cxgbe/base/t4_regs_values.h b/drivers/net/cxgbe/base/t4_regs_values.h
index 181bd9d..d7d3144 100644
--- a/drivers/net/cxgbe/base/t4_regs_values.h
+++ b/drivers/net/cxgbe/base/t4_regs_values.h
@@ -68,6 +68,7 @@
  * Egress Context field values
  */
 #define X_FETCHBURSTMIN_64B		2
+#define X_FETCHBURSTMIN_128B		3
 #define X_FETCHBURSTMAX_256B		2
 #define X_FETCHBURSTMAX_512B		3
 
diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c
index 359296e..b737183 100644
--- a/drivers/net/cxgbe/sge.c
+++ b/drivers/net/cxgbe/sge.c
@@ -74,7 +74,7 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap,
 /*
  * Max number of Rx buffers we replenish at a time.
  */
-#define MAX_RX_REFILL 16U
+#define MAX_RX_REFILL 64U
 
 #define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
 
@@ -238,39 +238,6 @@ static inline bool fl_starving(const struct adapter *adapter,
 	return fl->avail - fl->pend_cred <= s->fl_starve_thres;
 }
 
-static inline unsigned int get_buf_size(struct adapter *adapter,
-					const struct rx_sw_desc *d)
-{
-	struct sge *s = &adapter->sge;
-	unsigned int rx_buf_size_idx = d->dma_addr & RX_BUF_SIZE;
-	unsigned int buf_size;
-
-	switch (rx_buf_size_idx) {
-	case RX_SMALL_PG_BUF:
-		buf_size = PAGE_SIZE;
-		break;
-
-	case RX_LARGE_PG_BUF:
-		buf_size = PAGE_SIZE << s->fl_pg_order;
-		break;
-
-	case RX_SMALL_MTU_BUF:
-		buf_size = FL_MTU_SMALL_BUFSIZE(adapter);
-		break;
-
-	case RX_LARGE_MTU_BUF:
-		buf_size = FL_MTU_LARGE_BUFSIZE(adapter);
-		break;
-
-	default:
-		BUG_ON(1);
-		buf_size = 0; /* deal with bogus compiler warnings */
-		/* NOTREACHED */
-	}
-
-	return buf_size;
-}
-
 /**
  * free_rx_bufs - free the Rx buffers on an SGE free list
  * @q: the SGE free list to free buffers from
@@ -319,7 +286,8 @@ static void unmap_rx_buf(struct sge_fl *q)
 
 static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 {
-	if (q->pend_cred >= 8) {
+	/* see if we have exceeded q->size / 4 */
+	if (q->pend_cred >= (q->size / 4)) {
 		u32 val = adap->params.arch.sge_fl_db;
 
 		if (is_t4(adap->params.chip))
@@ -356,15 +324,6 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 	}
 }
 
-static inline struct rte_mbuf *cxgbe_rxmbuf_alloc(struct rte_mempool *mp)
-{
-	struct rte_mbuf *m;
-
-	m = __rte_mbuf_raw_alloc(mp);
-	__rte_mbuf_sanity_check_raw(m, 0);
-	return m;
-}
-
 static inline void set_rx_sw_desc(struct rx_sw_desc *sd, void *buf,
 				  dma_addr_t mapping)
 {
@@ -393,9 +352,20 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q,
 	__be64 *d = &q->desc[q->pidx];
 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 	unsigned int buf_size_idx = RX_SMALL_MTU_BUF;
+	struct rte_mbuf *buf_bulk[n];
+	int ret, i;
 
-	while (n--) {
-		struct rte_mbuf *mbuf = cxgbe_rxmbuf_alloc(rxq->rspq.mb_pool);
+	ret = rte_mempool_get_bulk(rxq->rspq.mb_pool, (void *)buf_bulk, n);
+	if (unlikely(ret != 0)) {
+		dev_debug(adap, "%s: failed to allocated fl entries in bulk ..\n",
+			  __func__);
+		q->alloc_failed++;
+		rxq->rspq.eth_dev->data->rx_mbuf_alloc_failed++;
+		goto out;
+	}
+
+	for (i = 0; i < n; i++) {
+		struct rte_mbuf *mbuf = buf_bulk[i];
 		dma_addr_t mapping;
 
 		if (!mbuf) {
@@ -405,11 +375,13 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q,
 			goto out;
 		}
 
+		rte_mbuf_refcnt_set(mbuf, 1);
 		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
 		mbuf->next = NULL;
+		mbuf->nb_segs = 1;
+		mbuf->port = rxq->rspq.port_id;
 
 		mapping = (dma_addr_t)(mbuf->buf_physaddr + mbuf->data_off);
-
 		mapping |= buf_size_idx;
 		*d++ = cpu_to_be64(mapping);
 		set_rx_sw_desc(sd, mbuf, mapping);
@@ -668,6 +640,7 @@ static void write_sgl(struct rte_mbuf *mbuf, struct sge_txq *q,
 	((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))
 
 #define Q_IDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->size)
+#define R_IDXDIFF(q, idx) IDXDIFF((q)->cidx, (q)->idx, (q)->size)
 
 /**
  * ring_tx_db - ring a Tx queue's doorbell
@@ -1354,31 +1327,6 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 }
 
 /**
- * restore_rx_bufs - put back a packet's Rx buffers
- * @q: the SGE free list
- * @frags: number of FL buffers to restore
- *
- * Puts back on an FL the Rx buffers.  The buffers have already been
- * unmapped and are left unmapped, we mark them so to prevent further
- * unmapping attempts.
- *
- * This function undoes a series of @unmap_rx_buf calls when we find out
- * that the current packet can't be processed right away afterall and we
- * need to come back to it later.  This is a very rare event and there's
- * no effort to make this particularly efficient.
- */
-static void restore_rx_bufs(struct sge_fl *q, int frags)
-{
-	while (frags--) {
-		if (q->cidx == 0)
-			q->cidx = q->size - 1;
-		else
-			q->cidx--;
-		q->avail++;
-	}
-}
-
-/**
  * is_new_response - check if a response is newly written
  * @r: the response descriptor
  * @q: the response queue
@@ -1431,7 +1379,6 @@ static int process_responses(struct sge_rspq *q, int budget,
 	int budget_left = budget;
 	const struct rsp_ctrl *rc;
 	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
-	struct adapter *adapter = q->adapter;
 
 	while (likely(budget_left)) {
 		rc = (const struct rsp_ctrl *)
@@ -1447,63 +1394,46 @@ static int process_responses(struct sge_rspq *q, int budget,
 		rsp_type = G_RSPD_TYPE(rc->u.type_gen);
 
 		if (likely(rsp_type == X_RSPD_TYPE_FLBUF)) {
-			struct pkt_gl si;
-			const struct rx_sw_desc *rsd;
-			struct rte_mbuf *pkt = NULL;
-			u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags;
+			const struct rx_sw_desc *rsd =
+						&rxq->fl.sdesc[rxq->fl.cidx];
+			const struct rss_header *rss_hdr =
+						(const void *)q->cur_desc;
+			const struct cpl_rx_pkt *cpl =
+						(const void *)&q->cur_desc[1];
+			bool csum_ok = cpl->csum_calc && !cpl->err_vec;
+			struct rte_mbuf *pkt;
+			u32 len = ntohl(rc->pldbuflen_qid);
 
-			si.usembufs = rxq->usembufs;
-			/*
-			 * In "use mbufs" mode, we don't pack multiple
-			 * ingress packets per buffer (mbuf) so we
-			 * should _always_ get a "New Buffer" flags
-			 * from the SGE.  Also, since we hand the
-			 * mbuf's up to the host stack for it to
-			 * eventually free, we don't release the mbuf's
-			 * in the driver (in contrast to the "packed
-			 * page" mode where the driver needs to
-			 * release its reference on the page buffers).
-			 */
 			BUG_ON(!(len & F_RSPD_NEWBUF));
-			len = G_RSPD_LEN(len);
-			si.tot_len = len;
-
-			/* gather packet fragments */
-			for (frags = 0; len; frags++) {
-				rsd = &rxq->fl.sdesc[rxq->fl.cidx];
-				bufsz = min(get_buf_size(adapter, rsd),	len);
-				pkt = rsd->buf;
-				pkt->data_len = bufsz;
-				pkt->pkt_len = bufsz;
-				si.mbufs[frags] = pkt;
-				len -= bufsz;
-				unmap_rx_buf(&rxq->fl);
+			pkt = rsd->buf;
+			pkt->data_len = G_RSPD_LEN(len);
+			pkt->pkt_len = pkt->data_len;
+			unmap_rx_buf(&rxq->fl);
+
+			if (cpl->l2info & htonl(F_RXF_IP)) {
+				pkt->ol_flags |= PKT_RX_IPV4_HDR;
+				if (unlikely(!csum_ok))
+					pkt->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+
+				if ((cpl->l2info &
+				     htonl(F_RXF_UDP | F_RXF_TCP)) && !csum_ok)
+					pkt->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+			} else if (cpl->l2info & htonl(F_RXF_IP6)) {
+				pkt->ol_flags |= PKT_RX_IPV6_HDR;
 			}
 
-			si.va = RTE_PTR_ADD(si.mbufs[0]->buf_addr,
-					    si.mbufs[0]->data_off);
-			rte_prefetch1(si.va);
-
-			/*
-			 * For the "use mbuf" case here, we can end up
-			 * chewing through our Free List very rapidly
-			 * with one entry per Ingress packet getting
-			 * consumed.  So if the handler() successfully
-			 * consumed the mbuf, check to see if we can
-			 * refill the Free List incrementally in the
-			 * loop ...
-			 */
-			si.nfrags = frags;
-			ret = q->handler(q, q->cur_desc, &si);
-
-			if (unlikely(ret != 0)) {
-				restore_rx_bufs(&rxq->fl, frags);
-			} else {
-				rx_pkts[budget - budget_left] = pkt;
-				if (fl_cap(&rxq->fl) - rxq->fl.avail >= 8)
-					__refill_fl(q->adapter, &rxq->fl);
+			if (!rss_hdr->filter_tid && rss_hdr->hash_type) {
+				pkt->ol_flags |= PKT_RX_RSS_HASH;
+				pkt->hash.rss = ntohl(rss_hdr->hash_val);
 			}
 
+			if (cpl->vlan_ex) {
+				pkt->ol_flags |= PKT_RX_VLAN_PKT;
+				pkt->vlan_tci = ntohs(cpl->vlan);
+			}
+			rxq->stats.pkts++;
+			rxq->stats.rx_bytes += pkt->pkt_len;
+			rx_pkts[budget - budget_left] = pkt;
 		} else if (likely(rsp_type == X_RSPD_TYPE_CPL)) {
 			ret = q->handler(q, q->cur_desc, NULL);
 		} else {
@@ -1518,6 +1448,34 @@ static int process_responses(struct sge_rspq *q, int budget,
 
 		rspq_next(q);
 		budget_left--;
+
+		if (R_IDXDIFF(q, gts_idx) >= 64) {
+			unsigned int cidx_inc = R_IDXDIFF(q, gts_idx);
+			unsigned int params;
+			u32 val;
+
+			__refill_fl(q->adapter, &rxq->fl);
+			params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX);
+			q->next_intr_params = params;
+			val = V_CIDXINC(cidx_inc) | V_SEINTARM(params);
+
+			if (unlikely(!q->bar2_addr))
+				t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS),
+					     val |
+					     V_INGRESSQID((u32)q->cntxt_id));
+			else {
+				writel(val | V_INGRESSQID(q->bar2_qid),
+				       (void *)((uintptr_t)q->bar2_addr +
+				       SGE_UDB_GTS));
+				/*
+				 * This Write memory Barrier will force the
+				 * write to the User Doorbell area to be
+				 * flushed.
+				 */
+				wmb();
+			}
+			q->gts_idx = q->cidx;
+		}
 	}
 
 	/*
@@ -1526,7 +1484,7 @@ static int process_responses(struct sge_rspq *q, int budget,
 	 * refill the Free List.
 	 */
 
-	if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 8)
+	if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 64)
 		__refill_fl(q->adapter, &rxq->fl);
 
 	return budget - budget_left;
@@ -1535,36 +1493,9 @@ static int process_responses(struct sge_rspq *q, int budget,
 int cxgbe_poll(struct sge_rspq *q, struct rte_mbuf **rx_pkts,
 	       unsigned int budget, unsigned int *work_done)
 {
-	unsigned int params;
-	u32 val;
 	int err = 0;
 
 	*work_done = process_responses(q, budget, rx_pkts);
-	params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX);
-	q->next_intr_params = params;
-	val = V_CIDXINC(*work_done) | V_SEINTARM(params);
-
-	if (*work_done) {
-		/*
-		 * If we don't have access to the new User GTS (T5+),
-		 * use the old doorbell mechanism; otherwise use the new
-		 * BAR2 mechanism.
-		 */
-		if (unlikely(!q->bar2_addr))
-			t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS),
-				     val | V_INGRESSQID((u32)q->cntxt_id));
-		else {
-			writel(val | V_INGRESSQID(q->bar2_qid),
-			       (void *)((uintptr_t)q->bar2_addr +
-			       SGE_UDB_GTS));
-			/*
-			 * This Write memory Barrier will force the write to
-			 * the User Doorbell area to be flushed.
-			 */
-			wmb();
-		}
-	}
-
 	return err;
 }
 
@@ -1717,7 +1648,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		 * Hence maximum allowed burst size will be 448 bytes.
 		 */
 		c.fl0dcaen_to_fl0cidxfthresh =
-			htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
+			htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) |
 			      V_FW_IQ_CMD_FL0FBMAX((chip <= CHELSIO_T5) ?
 			      X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B));
 		c.fl0size = htons(flsz);
@@ -1730,6 +1661,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 
 	iq->cur_desc = iq->desc;
 	iq->cidx = 0;
+	iq->gts_idx = 0;
 	iq->gen = 1;
 	iq->next_intr_params = iq->intr_params;
 	iq->cntxt_id = ntohs(c.iqid);
@@ -1739,6 +1671,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	iq->size--;                           /* subtract status entry */
 	iq->eth_dev = eth_dev;
 	iq->handler = hnd;
+	iq->port_id = pi->port_id;
 	iq->mb_pool = mp;
 
 	/* set offset to -1 to distinguish ingress queues without FL */
-- 
2.4.1

next prev parent reply	other threads:[~2015-07-07 17:12 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-07 17:12 [dpdk-dev] [PATCH 0/3] cxgbe: RX perf fixes, doc update, and add support for more devices Rahul Lakkireddy
2015-07-07 17:12 ` Rahul Lakkireddy [this message]
2015-07-07 21:30   ` [dpdk-dev] [PATCH 1/3] cxgbe: Fix RX performance for cxgbe PMD Thomas Monjalon
2015-07-09 14:54     ` Rahul Lakkireddy
2015-07-07 17:12 ` [dpdk-dev] [PATCH 2/3] cxgbe: Add more supported Chelsio T5 devices Rahul Lakkireddy
2015-07-07 17:12 ` [dpdk-dev] [PATCH 3/3] doc: Update cxgbe documentation Rahul Lakkireddy
2015-07-10  1:02 ` [dpdk-dev] [PATCH 0/3] cxgbe: RX perf fixes, doc update, and add support for more devices Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=96172d6f608d59f6d8463d407f21d08a963d6d5a.1436288467.git.rahul.lakkireddy@chelsio.com \
    --to=rahul.lakkireddy@chelsio.com \
    --cc=dev@dpdk.org \
    --cc=felix@chelsio.com \
    --cc=kumaras@chelsio.com \
    --cc=nirranjan@chelsio.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).