DPDK patches and discussions
 help / color / mirror / Atom feed
From: Adrien Mazarguil <adrien.mazarguil@6wind.com>
To: Ferruh Yigit <ferruh.yigit@intel.com>
Cc: dev@dpdk.org, Matan Azrad <matan@mellanox.com>,
	Ophir Munk <ophirmu@mellanox.com>,
	Moti Haimovsky <motih@mellanox.com>,
	Vasily Philipov <vasilyf@mellanox.com>
Subject: [dpdk-dev] [PATCH v6 2/5] net/mlx4: add Rx bypassing Verbs
Date: Thu, 12 Oct 2017 14:29:57 +0200	[thread overview]
Message-ID: <5900a2c9893c879debccf5c14cbceda952332858.1507810956.git.adrien.mazarguil@6wind.com> (raw)
In-Reply-To: <cover.1507810956.git.adrien.mazarguil@6wind.com>

From: Moti Haimovsky <motih@mellanox.com>

This patch adds support for accessing the hardware directly when
handling Rx packets eliminating the need to use Verbs in the Rx data
path.

Rx scatter support: calculate the number of scatters on the fly
according to the maximum expected packet size.

Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
Signed-off-by: Moti Haimovsky <motih@mellanox.com>
Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 doc/guides/nics/features/mlx4.ini |   1 +
 drivers/net/mlx4/mlx4_rxq.c       | 151 +++++++++++++++-------
 drivers/net/mlx4/mlx4_rxtx.c      | 226 +++++++++++++++++++--------------
 drivers/net/mlx4/mlx4_rxtx.h      |  19 ++-
 4 files changed, 242 insertions(+), 155 deletions(-)

diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
index 9750ebf..19ae688 100644
--- a/doc/guides/nics/features/mlx4.ini
+++ b/doc/guides/nics/features/mlx4.ini
@@ -12,6 +12,7 @@ Rx interrupt         = Y
 Queue start/stop     = Y
 MTU update           = Y
 Jumbo frame          = Y
+Scattered Rx         = Y
 Promiscuous mode     = Y
 Allmulticast mode    = Y
 Unicast MAC filter   = Y
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 483fe9b..39c83bc 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -51,6 +51,7 @@
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include <rte_byteorder.h>
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_ethdev.h>
@@ -312,45 +313,46 @@ void mlx4_rss_detach(struct mlx4_rss *rss)
 static int
 mlx4_rxq_alloc_elts(struct rxq *rxq)
 {
-	struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
+	const uint32_t elts_n = 1 << rxq->elts_n;
+	const uint32_t sges_n = 1 << rxq->sges_n;
+	struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
 	unsigned int i;
 
-	/* For each WR (packet). */
+	assert(rte_is_power_of_2(elts_n));
 	for (i = 0; i != RTE_DIM(*elts); ++i) {
-		struct rxq_elt *elt = &(*elts)[i];
-		struct ibv_recv_wr *wr = &elt->wr;
-		struct ibv_sge *sge = &(*elts)[i].sge;
+		volatile struct mlx4_wqe_data_seg *scat = &(*rxq->wqes)[i];
 		struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 
 		if (buf == NULL) {
 			while (i--) {
-				rte_pktmbuf_free_seg((*elts)[i].buf);
-				(*elts)[i].buf = NULL;
+				rte_pktmbuf_free_seg((*elts)[i]);
+				(*elts)[i] = NULL;
 			}
 			rte_errno = ENOMEM;
 			return -rte_errno;
 		}
-		elt->buf = buf;
-		wr->next = &(*elts)[(i + 1)].wr;
-		wr->sg_list = sge;
-		wr->num_sge = 1;
 		/* Headroom is reserved by rte_pktmbuf_alloc(). */
 		assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
 		/* Buffer is supposed to be empty. */
 		assert(rte_pktmbuf_data_len(buf) == 0);
 		assert(rte_pktmbuf_pkt_len(buf) == 0);
-		/* sge->addr must be able to store a pointer. */
-		assert(sizeof(sge->addr) >= sizeof(uintptr_t));
-		/* SGE keeps its headroom. */
-		sge->addr = (uintptr_t)
-			((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
-		sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
-		sge->lkey = rxq->mr->lkey;
-		/* Redundant check for tailroom. */
-		assert(sge->length == rte_pktmbuf_tailroom(buf));
+		/* Only the first segment keeps headroom. */
+		if (i % sges_n)
+			buf->data_off = 0;
+		buf->port = rxq->port_id;
+		buf->data_len = rte_pktmbuf_tailroom(buf);
+		buf->pkt_len = rte_pktmbuf_tailroom(buf);
+		buf->nb_segs = 1;
+		*scat = (struct mlx4_wqe_data_seg){
+			.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+								  uintptr_t)),
+			.byte_count = rte_cpu_to_be_32(buf->data_len),
+			.lkey = rte_cpu_to_be_32(rxq->mr->lkey),
+		};
+		(*elts)[i] = buf;
 	}
-	/* The last WR pointer must be NULL. */
-	(*elts)[(i - 1)].wr.next = NULL;
+	DEBUG("%p: allocated and configured %u segments (max %u packets)",
+	      (void *)rxq, elts_n, elts_n / sges_n);
 	return 0;
 }
 
@@ -364,14 +366,14 @@ static void
 mlx4_rxq_free_elts(struct rxq *rxq)
 {
 	unsigned int i;
-	struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
+	struct rte_mbuf *(*elts)[1 << rxq->elts_n] = rxq->elts;
 
-	DEBUG("%p: freeing WRs", (void *)rxq);
+	DEBUG("%p: freeing Rx queue elements", (void *)rxq);
 	for (i = 0; (i != RTE_DIM(*elts)); ++i) {
-		if (!(*elts)[i].buf)
+		if (!(*elts)[i])
 			continue;
-		rte_pktmbuf_free_seg((*elts)[i].buf);
-		(*elts)[i].buf = NULL;
+		rte_pktmbuf_free_seg((*elts)[i]);
+		(*elts)[i] = NULL;
 	}
 }
 
@@ -400,8 +402,11 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		    struct rte_mempool *mp)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct mlx4dv_obj mlxdv;
+	struct mlx4dv_rwq dv_rwq;
+	struct mlx4dv_cq dv_cq;
 	uint32_t mb_len = rte_pktmbuf_data_room_size(mp);
-	struct rxq_elt (*elts)[desc];
+	struct rte_mbuf *(*elts)[rte_align32pow2(desc)];
 	struct rte_flow_error error;
 	struct rxq *rxq;
 	struct mlx4_malloc_vec vec[] = {
@@ -439,6 +444,12 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		ERROR("%p: invalid number of Rx descriptors", (void *)dev);
 		return -rte_errno;
 	}
+	if (desc != RTE_DIM(*elts)) {
+		desc = RTE_DIM(*elts);
+		WARN("%p: increased number of descriptors in Rx queue %u"
+		     " to the next power of two (%u)",
+		     (void *)dev, idx, desc);
+	}
 	/* Allocate and initialize Rx queue. */
 	mlx4_zmallocv_socket("RXQ", vec, RTE_DIM(vec), socket);
 	if (!rxq) {
@@ -450,8 +461,8 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		.priv = priv,
 		.mp = mp,
 		.port_id = dev->data->port_id,
-		.elts_n = desc,
-		.elts_head = 0,
+		.sges_n = 0,
+		.elts_n = rte_log2_u32(desc),
 		.elts = elts,
 		.stats.idx = idx,
 		.socket = socket,
@@ -462,9 +473,29 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	    (mb_len - RTE_PKTMBUF_HEADROOM)) {
 		;
 	} else if (dev->data->dev_conf.rxmode.enable_scatter) {
-		WARN("%p: scattered mode has been requested but is"
-		     " not supported, this may lead to packet loss",
-		     (void *)dev);
+		uint32_t size =
+			RTE_PKTMBUF_HEADROOM +
+			dev->data->dev_conf.rxmode.max_rx_pkt_len;
+		uint32_t sges_n;
+
+		/*
+		 * Determine the number of SGEs needed for a full packet
+		 * and round it to the next power of two.
+		 */
+		sges_n = rte_log2_u32((size / mb_len) + !!(size % mb_len));
+		rxq->sges_n = sges_n;
+		/* Make sure sges_n did not overflow. */
+		size = mb_len * (1 << rxq->sges_n);
+		size -= RTE_PKTMBUF_HEADROOM;
+		if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+			rte_errno = EOVERFLOW;
+			ERROR("%p: too many SGEs (%u) needed to handle"
+			      " requested maximum packet size %u",
+			      (void *)dev,
+			      1 << sges_n,
+			      dev->data->dev_conf.rxmode.max_rx_pkt_len);
+			goto error;
+		}
 	} else {
 		WARN("%p: the requested maximum Rx packet size (%u) is"
 		     " larger than a single mbuf (%u) and scattered"
@@ -473,6 +504,17 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		     dev->data->dev_conf.rxmode.max_rx_pkt_len,
 		     mb_len - RTE_PKTMBUF_HEADROOM);
 	}
+	DEBUG("%p: maximum number of segments per packet: %u",
+	      (void *)dev, 1 << rxq->sges_n);
+	if (desc % (1 << rxq->sges_n)) {
+		rte_errno = EINVAL;
+		ERROR("%p: number of Rx queue descriptors (%u) is not a"
+		      " multiple of maximum segments per packet (%u)",
+		      (void *)dev,
+		      desc,
+		      1 << rxq->sges_n);
+		goto error;
+	}
 	/* Use the entire Rx mempool as the memory region. */
 	rxq->mr = mlx4_mp2mr(priv->pd, mp);
 	if (!rxq->mr) {
@@ -497,7 +539,8 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			goto error;
 		}
 	}
-	rxq->cq = ibv_create_cq(priv->ctx, desc, NULL, rxq->channel, 0);
+	rxq->cq = ibv_create_cq(priv->ctx, desc >> rxq->sges_n, NULL,
+				rxq->channel, 0);
 	if (!rxq->cq) {
 		rte_errno = ENOMEM;
 		ERROR("%p: CQ creation failure: %s",
@@ -508,8 +551,8 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		(priv->ctx,
 		 &(struct ibv_wq_init_attr){
 			.wq_type = IBV_WQT_RQ,
-			.max_wr = RTE_MIN(priv->device_attr.max_qp_wr, desc),
-			.max_sge = 1,
+			.max_wr = desc >> rxq->sges_n,
+			.max_sge = 1 << rxq->sges_n,
 			.pd = priv->pd,
 			.cq = rxq->cq,
 		 });
@@ -531,27 +574,43 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		      (void *)dev, strerror(rte_errno));
 		goto error;
 	}
-	ret = mlx4_rxq_alloc_elts(rxq);
+	/* Retrieve device queue information. */
+	mlxdv.cq.in = rxq->cq;
+	mlxdv.cq.out = &dv_cq;
+	mlxdv.rwq.in = rxq->wq;
+	mlxdv.rwq.out = &dv_rwq;
+	ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ);
 	if (ret) {
-		ERROR("%p: RXQ allocation failed: %s",
-		      (void *)dev, strerror(rte_errno));
+		rte_errno = EINVAL;
+		ERROR("%p: failed to obtain device information", (void *)dev);
 		goto error;
 	}
-	ret = ibv_post_wq_recv(rxq->wq, &(*rxq->elts)[0].wr,
-			       &(struct ibv_recv_wr *){ NULL });
+	rxq->wqes =
+		(volatile struct mlx4_wqe_data_seg (*)[])
+		((uintptr_t)dv_rwq.buf.buf + dv_rwq.rq.offset);
+	rxq->rq_db = dv_rwq.rdb;
+	rxq->rq_ci = 0;
+	rxq->mcq.buf = dv_cq.buf.buf;
+	rxq->mcq.cqe_cnt = dv_cq.cqe_cnt;
+	rxq->mcq.set_ci_db = dv_cq.set_ci_db;
+	rxq->mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0;
+	ret = mlx4_rxq_alloc_elts(rxq);
 	if (ret) {
-		rte_errno = ret;
-		ERROR("%p: ibv_post_recv() failed: %s",
-		      (void *)dev,
-		      strerror(rte_errno));
+		ERROR("%p: RXQ allocation failed: %s",
+		      (void *)dev, strerror(rte_errno));
 		goto error;
 	}
 	DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq);
 	dev->data->rx_queues[idx] = rxq;
 	/* Enable associated flows. */
 	ret = mlx4_flow_sync(priv, &error);
-	if (!ret)
+	if (!ret) {
+		/* Update doorbell counter. */
+		rxq->rq_ci = desc >> rxq->sges_n;
+		rte_wmb();
+		*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 		return 0;
+	}
 	ERROR("cannot re-attach flow rules to queue %u"
 	      " (code %d, \"%s\"), flow error type %d, cause %p, message: %s",
 	      idx, -ret, strerror(-ret), error.type, error.cause,
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 38b87a0..cc0baaa 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -538,9 +538,44 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 }
 
 /**
- * DPDK callback for Rx.
+ * Poll one CQE from CQ.
  *
- * The following function doesn't manage scattered packets.
+ * @param rxq
+ *   Pointer to the receive queue structure.
+ * @param[out] out
+ *   Just polled CQE.
+ *
+ * @return
+ *   Number of bytes of the CQE, 0 in case there is no completion.
+ */
+static unsigned int
+mlx4_cq_poll_one(struct rxq *rxq, struct mlx4_cqe **out)
+{
+	int ret = 0;
+	struct mlx4_cqe *cqe = NULL;
+	struct mlx4_cq *cq = &rxq->mcq;
+
+	cqe = (struct mlx4_cqe *)mlx4_get_cqe(cq, cq->cons_index);
+	if (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+	    !!(cq->cons_index & cq->cqe_cnt))
+		goto out;
+	/*
+	 * Make sure we read CQ entry contents after we've checked the
+	 * ownership bit.
+	 */
+	rte_rmb();
+	assert(!(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK));
+	assert((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) !=
+	       MLX4_CQE_OPCODE_ERROR);
+	ret = rte_be_to_cpu_32(cqe->byte_cnt);
+	++cq->cons_index;
+out:
+	*out = cqe;
+	return ret;
+}
+
+/**
+ * DPDK callback for Rx with scattered packets support.
  *
  * @param dpdk_rxq
  *   Generic pointer to Rx queue structure.
@@ -555,112 +590,107 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 uint16_t
 mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 {
-	struct rxq *rxq = (struct rxq *)dpdk_rxq;
-	struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
-	const unsigned int elts_n = rxq->elts_n;
-	unsigned int elts_head = rxq->elts_head;
-	struct ibv_wc wcs[pkts_n];
-	struct ibv_recv_wr *wr_head = NULL;
-	struct ibv_recv_wr **wr_next = &wr_head;
-	struct ibv_recv_wr *wr_bad = NULL;
-	unsigned int i;
-	unsigned int pkts_ret = 0;
-	int ret;
+	struct rxq *rxq = dpdk_rxq;
+	const uint32_t wr_cnt = (1 << rxq->elts_n) - 1;
+	const uint16_t sges_n = rxq->sges_n;
+	struct rte_mbuf *pkt = NULL;
+	struct rte_mbuf *seg = NULL;
+	unsigned int i = 0;
+	uint32_t rq_ci = rxq->rq_ci << sges_n;
+	int len = 0;
 
-	ret = ibv_poll_cq(rxq->cq, pkts_n, wcs);
-	if (unlikely(ret == 0))
-		return 0;
-	if (unlikely(ret < 0)) {
-		DEBUG("rxq=%p, ibv_poll_cq() failed (wc_n=%d)",
-		      (void *)rxq, ret);
-		return 0;
-	}
-	assert(ret <= (int)pkts_n);
-	/* For each work completion. */
-	for (i = 0; i != (unsigned int)ret; ++i) {
-		struct ibv_wc *wc = &wcs[i];
-		struct rxq_elt *elt = &(*elts)[elts_head];
-		struct ibv_recv_wr *wr = &elt->wr;
-		uint32_t len = wc->byte_len;
-		struct rte_mbuf *seg = elt->buf;
-		struct rte_mbuf *rep;
+	while (pkts_n) {
+		struct mlx4_cqe *cqe;
+		uint32_t idx = rq_ci & wr_cnt;
+		struct rte_mbuf *rep = (*rxq->elts)[idx];
+		volatile struct mlx4_wqe_data_seg *scat = &(*rxq->wqes)[idx];
 
-		/* Sanity checks. */
-		assert(wr->sg_list == &elt->sge);
-		assert(wr->num_sge == 1);
-		assert(elts_head < rxq->elts_n);
-		assert(rxq->elts_head < rxq->elts_n);
-		/*
-		 * Fetch initial bytes of packet descriptor into a
-		 * cacheline while allocating rep.
-		 */
-		rte_mbuf_prefetch_part1(seg);
-		rte_mbuf_prefetch_part2(seg);
-		/* Link completed WRs together for repost. */
-		*wr_next = wr;
-		wr_next = &wr->next;
-		if (unlikely(wc->status != IBV_WC_SUCCESS)) {
-			/* Whatever, just repost the offending WR. */
-			DEBUG("rxq=%p: bad work completion status (%d): %s",
-			      (void *)rxq, wc->status,
-			      ibv_wc_status_str(wc->status));
-			/* Increment dropped packets counter. */
-			++rxq->stats.idropped;
-			goto repost;
-		}
+		/* Update the 'next' pointer of the previous segment. */
+		if (pkt)
+			seg->next = rep;
+		seg = rep;
+		rte_prefetch0(seg);
+		rte_prefetch0(scat);
 		rep = rte_mbuf_raw_alloc(rxq->mp);
 		if (unlikely(rep == NULL)) {
-			/*
-			 * Unable to allocate a replacement mbuf,
-			 * repost WR.
-			 */
-			DEBUG("rxq=%p: can't allocate a new mbuf",
-			      (void *)rxq);
-			/* Increase out of memory counters. */
 			++rxq->stats.rx_nombuf;
-			++rxq->priv->dev->data->rx_mbuf_alloc_failed;
-			goto repost;
+			if (!pkt) {
+				/*
+				 * No buffers before we even started,
+				 * bail out silently.
+				 */
+				break;
+			}
+			while (pkt != seg) {
+				assert(pkt != (*rxq->elts)[idx]);
+				rep = pkt->next;
+				pkt->next = NULL;
+				pkt->nb_segs = 1;
+				rte_mbuf_raw_free(pkt);
+				pkt = rep;
+			}
+			break;
+		}
+		if (!pkt) {
+			/* Looking for the new packet. */
+			len = mlx4_cq_poll_one(rxq, &cqe);
+			if (!len) {
+				rte_mbuf_raw_free(rep);
+				break;
+			}
+			if (unlikely(len < 0)) {
+				/* Rx error, packet is likely too large. */
+				rte_mbuf_raw_free(rep);
+				++rxq->stats.idropped;
+				goto skip;
+			}
+			pkt = seg;
+			pkt->packet_type = 0;
+			pkt->ol_flags = 0;
+			pkt->pkt_len = len;
+		}
+		rep->nb_segs = 1;
+		rep->port = rxq->port_id;
+		rep->data_len = seg->data_len;
+		rep->data_off = seg->data_off;
+		(*rxq->elts)[idx] = rep;
+		/*
+		 * Fill NIC descriptor with the new buffer. The lkey and size
+		 * of the buffers are already known, only the buffer address
+		 * changes.
+		 */
+		scat->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
+		if (len > seg->data_len) {
+			len -= seg->data_len;
+			++pkt->nb_segs;
+			++rq_ci;
+			continue;
 		}
-		/* Reconfigure sge to use rep instead of seg. */
-		elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
-		assert(elt->sge.lkey == rxq->mr->lkey);
-		elt->buf = rep;
-		/* Update seg information. */
-		seg->data_off = RTE_PKTMBUF_HEADROOM;
-		seg->nb_segs = 1;
-		seg->port = rxq->port_id;
-		seg->next = NULL;
-		seg->pkt_len = len;
+		/* The last segment. */
 		seg->data_len = len;
-		seg->packet_type = 0;
-		seg->ol_flags = 0;
+		/* Increment bytes counter. */
+		rxq->stats.ibytes += pkt->pkt_len;
 		/* Return packet. */
-		*(pkts++) = seg;
-		++pkts_ret;
-		/* Increase bytes counter. */
-		rxq->stats.ibytes += len;
-repost:
-		if (++elts_head >= elts_n)
-			elts_head = 0;
-		continue;
+		*(pkts++) = pkt;
+		pkt = NULL;
+		--pkts_n;
+		++i;
+skip:
+		/* Align consumer index to the next stride. */
+		rq_ci >>= sges_n;
+		++rq_ci;
+		rq_ci <<= sges_n;
 	}
-	if (unlikely(i == 0))
+	if (unlikely(i == 0 && (rq_ci >> sges_n) == rxq->rq_ci))
 		return 0;
-	/* Repost WRs. */
-	*wr_next = NULL;
-	assert(wr_head);
-	ret = ibv_post_wq_recv(rxq->wq, wr_head, &wr_bad);
-	if (unlikely(ret)) {
-		/* Inability to repost WRs is fatal. */
-		DEBUG("%p: recv_burst(): failed (ret=%d)",
-		      (void *)rxq->priv,
-		      ret);
-		abort();
-	}
-	rxq->elts_head = elts_head;
-	/* Increase packets counter. */
-	rxq->stats.ipackets += pkts_ret;
-	return pkts_ret;
+	/* Update the consumer index. */
+	rxq->rq_ci = rq_ci >> sges_n;
+	rte_wmb();
+	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+	*rxq->mcq.set_ci_db = rte_cpu_to_be_32(rxq->mcq.cons_index & 0xffffff);
+	/* Increment packets counter. */
+	rxq->stats.ipackets += i;
+	return i;
 }
 
 /**
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index ff27126..fa5738f 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -63,13 +63,6 @@ struct mlx4_rxq_stats {
 	uint64_t rx_nombuf; /**< Total of Rx mbuf allocation failures. */
 };
 
-/** Rx element. */
-struct rxq_elt {
-	struct ibv_recv_wr wr; /**< Work request. */
-	struct ibv_sge sge; /**< Scatter/gather element. */
-	struct rte_mbuf *buf; /**< Buffer. */
-};
-
 /** Rx queue descriptor. */
 struct rxq {
 	struct priv *priv; /**< Back pointer to private data. */
@@ -78,10 +71,14 @@ struct rxq {
 	struct ibv_cq *cq; /**< Completion queue. */
 	struct ibv_wq *wq; /**< Work queue. */
 	struct ibv_comp_channel *channel; /**< Rx completion channel. */
-	unsigned int port_id; /**< Port ID for incoming packets. */
-	unsigned int elts_n; /**< (*elts)[] length. */
-	unsigned int elts_head; /**< Current index in (*elts)[]. */
-	struct rxq_elt (*elts)[]; /**< Rx elements. */
+	uint16_t rq_ci; /**< Saved RQ consumer index. */
+	uint16_t port_id; /**< Port ID for incoming packets. */
+	uint16_t sges_n; /**< Number of segments per packet (log2 value). */
+	uint16_t elts_n; /**< Mbuf queue size (log2 value). */
+	struct rte_mbuf *(*elts)[]; /**< Rx elements. */
+	volatile struct mlx4_wqe_data_seg (*wqes)[]; /**< HW queue entries. */
+	volatile uint32_t *rq_db; /**< RQ doorbell record. */
+	struct mlx4_cq mcq;  /**< Info for directly manipulating the CQ. */
 	struct mlx4_rxq_stats stats; /**< Rx queue counters. */
 	unsigned int socket; /**< CPU socket ID for allocations. */
 	uint8_t data[]; /**< Remaining queue resources. */
-- 
2.1.4

  parent reply	other threads:[~2017-10-12 12:30 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-24 15:54 [dpdk-dev] [PATCH 0/5] new mlx4 Tx datapath bypassing ibverbs Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 1/5] net/mlx4: add simple Tx " Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 2/5] net/mlx4: support multi-segments Tx Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 3/5] net/mlx4: refine setting Tx completion flag Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 4/5] net/mlx4: add Tx checksum offloads Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 5/5] net/mlx4: add loopback Tx from VF Moti Haimovsky
2017-10-03 10:48 ` [dpdk-dev] [PATCH v2 0/6] new mlx4 datapath bypassing ibverbs Matan Azrad
2017-10-03 10:48   ` [dpdk-dev] [PATCH v2 1/6] net/mlx4: add simple Tx " Matan Azrad
2017-10-03 10:48   ` [dpdk-dev] [PATCH v2 2/6] net/mlx4: get back Rx flow functionality Matan Azrad
2017-10-03 10:48   ` [dpdk-dev] [PATCH v2 3/6] net/mlx4: support multi-segments Tx Matan Azrad
2017-10-03 10:48   ` [dpdk-dev] [PATCH v2 4/6] net/mlx4: get back Tx checksum offloads Matan Azrad
2017-10-03 10:48   ` [dpdk-dev] [PATCH v2 5/6] net/mlx4: get back Rx " Matan Azrad
2017-10-03 22:26     ` Ferruh Yigit
2017-10-03 10:48   ` [dpdk-dev] [PATCH v2 6/6] net/mlx4: add loopback Tx from VF Matan Azrad
2017-10-03 22:27   ` [dpdk-dev] [PATCH v2 0/6] new mlx4 datapath bypassing ibverbs Ferruh Yigit
2017-10-04 18:48   ` [dpdk-dev] [PATCH v3 " Adrien Mazarguil
2017-10-04 18:48     ` [dpdk-dev] [PATCH v3 1/6] net/mlx4: add simple Tx bypassing Verbs Adrien Mazarguil
2017-10-04 18:48     ` [dpdk-dev] [PATCH v3 2/6] net/mlx4: restore full Rx support " Adrien Mazarguil
2017-10-04 18:48     ` [dpdk-dev] [PATCH v3 3/6] net/mlx4: restore Tx gather support Adrien Mazarguil
2017-10-04 18:48     ` [dpdk-dev] [PATCH v3 4/6] net/mlx4: restore Tx checksum offloads Adrien Mazarguil
2017-10-04 18:48     ` [dpdk-dev] [PATCH v3 5/6] net/mlx4: restore Rx offloads Adrien Mazarguil
2017-10-04 18:48     ` [dpdk-dev] [PATCH v3 6/6] net/mlx4: add loopback Tx from VF Adrien Mazarguil
2017-10-05  9:33     ` [dpdk-dev] [PATCH v4 0/7] new mlx4 datapath bypassing ibverbs Ophir Munk
2017-10-05  9:33       ` [dpdk-dev] [PATCH v4 1/7] net/mlx4: add simple Tx bypassing Verbs Ophir Munk
2017-10-05  9:33       ` [dpdk-dev] [PATCH v4 2/7] net/mlx4: restore full Rx support " Ophir Munk
2017-10-05  9:33       ` [dpdk-dev] [PATCH v4 3/7] net/mlx4: restore Rx scatter support Ophir Munk
2017-10-05  9:33       ` [dpdk-dev] [PATCH v4 4/7] net/mlx4: restore Tx gather support Ophir Munk
2017-10-05  9:33       ` [dpdk-dev] [PATCH v4 5/7] net/mlx4: restore Tx checksum offloads Ophir Munk
2017-10-05  9:33       ` [dpdk-dev] [PATCH v4 6/7] net/mlx4: restore Rx offloads Ophir Munk
2017-10-05  9:33       ` [dpdk-dev] [PATCH v4 7/7] net/mlx4: add loopback Tx from VF Ophir Munk
2017-10-05 11:40       ` [dpdk-dev] [PATCH v4 0/7] new mlx4 datapath bypassing ibverbs Adrien Mazarguil
2017-10-05 18:48       ` Ferruh Yigit
2017-10-05 18:54         ` Ferruh Yigit
2017-10-11 18:31       ` [dpdk-dev] [PATCH v5 0/5] " Adrien Mazarguil
2017-10-11 18:31         ` [dpdk-dev] [PATCH v5 1/5] net/mlx4: add Tx bypassing Verbs Adrien Mazarguil
2017-10-11 18:31         ` [dpdk-dev] [PATCH v5 2/5] net/mlx4: add Rx " Adrien Mazarguil
2017-10-11 18:32         ` [dpdk-dev] [PATCH v5 3/5] net/mlx4: restore Tx checksum offloads Adrien Mazarguil
2017-10-11 18:32         ` [dpdk-dev] [PATCH v5 4/5] net/mlx4: restore Rx offloads Adrien Mazarguil
2017-10-11 18:32         ` [dpdk-dev] [PATCH v5 5/5] net/mlx4: add loopback Tx from VF Adrien Mazarguil
2017-10-12 12:29         ` [dpdk-dev] [PATCH v6 0/5] new mlx4 datapath bypassing ibverbs Adrien Mazarguil
2017-10-12 12:29           ` [dpdk-dev] [PATCH v6 1/5] net/mlx4: add Tx bypassing Verbs Adrien Mazarguil
2017-10-12 12:29           ` Adrien Mazarguil [this message]
2017-10-12 12:29           ` [dpdk-dev] [PATCH v6 3/5] net/mlx4: restore Tx checksum offloads Adrien Mazarguil
2017-10-12 12:29           ` [dpdk-dev] [PATCH v6 4/5] net/mlx4: restore Rx offloads Adrien Mazarguil
2017-10-12 12:30           ` [dpdk-dev] [PATCH v6 5/5] net/mlx4: add loopback Tx from VF Adrien Mazarguil
2017-10-24  6:29           ` [dpdk-dev] [PATCH v6 0/5] new mlx4 datapath bypassing ibverbs gowrishankar muthukrishnan
2017-10-24  8:49             ` gowrishankar muthukrishnan
2017-10-24  9:55               ` Nélio Laranjeiro
2017-10-24 10:01                 ` Adrien Mazarguil
2017-10-24 16:59           ` Ferruh Yigit
2017-10-04 21:48   ` [dpdk-dev] [PATCH v3 0/7] " Ophir Munk
2017-10-04 21:49     ` [dpdk-dev] [PATCH v3 1/7] net/mlx4: add simple Tx " Ophir Munk
2017-10-04 21:49     ` [dpdk-dev] [PATCH v3 2/7] net/mlx4: get back Rx flow functionality Ophir Munk
2017-10-04 21:49     ` [dpdk-dev] [PATCH v3 3/7] net/mlx4: support multi-segments Rx Ophir Munk
2017-10-04 21:49     ` [dpdk-dev] [PATCH v3 4/7] net/mlx4: support multi-segments Tx Ophir Munk
2017-10-04 21:49     ` [dpdk-dev] [PATCH v3 5/7] net/mlx4: get back Tx checksum offloads Ophir Munk
2017-10-04 21:49     ` [dpdk-dev] [PATCH v3 6/7] net/mlx4: get back Rx " Ophir Munk
2017-10-04 21:49     ` [dpdk-dev] [PATCH v3 7/7] net/mlx4: add loopback Tx from VF Ophir Munk
2017-10-04 22:37     ` [dpdk-dev] [PATCH v3 0/7] new mlx4 datapath bypassing ibverbs Ferruh Yigit
2017-10-04 22:46       ` Thomas Monjalon
2017-10-24 11:56 ` [dpdk-dev] [PATCH 0/5] new mlx4 Tx " Nélio Laranjeiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5900a2c9893c879debccf5c14cbceda952332858.1507810956.git.adrien.mazarguil@6wind.com \
    --to=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=matan@mellanox.com \
    --cc=motih@mellanox.com \
    --cc=ophirmu@mellanox.com \
    --cc=vasilyf@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).