From: Matan Azrad <matan@mellanox.com>
To: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Cc: dev@dpdk.org, Moti Haimovsky <motih@mellanox.com>,
Vasily Philipov <vasilyf@mellanox.com>
Subject: [dpdk-dev] [PATCH v2 2/6] net/mlx4: get back Rx flow functionality
Date: Tue, 3 Oct 2017 10:48:27 +0000 [thread overview]
Message-ID: <1507027711-879-3-git-send-email-matan@mellanox.com> (raw)
In-Reply-To: <1507027711-879-1-git-send-email-matan@mellanox.com>
From: Moti Haimovsky <motih@mellanox.com>
This patch adds support for accessing the hardware directly when handling
Rx packets eliminating the need to use verbs in the Rx datapath.
Now the number of scatters is calculated on the fly, according to the
maximum expected packet size.
Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
drivers/net/mlx4/mlx4.h | 11 --
drivers/net/mlx4/mlx4_rxq.c | 176 +++++++++++++++++++-------------
drivers/net/mlx4/mlx4_rxtx.c | 229 ++++++++++++++++++++++++------------------
drivers/net/mlx4/mlx4_rxtx.h | 19 ++--
drivers/net/mlx4/mlx4_utils.h | 20 ++++
5 files changed, 268 insertions(+), 187 deletions(-)
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 93e5502..b6e1ef2 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -57,17 +57,6 @@
/* Maximum size for inline data. */
#define MLX4_PMD_MAX_INLINE 0
-/*
- * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP
- * from which buffers are to be transmitted will have to be mapped by this
- * driver to their own Memory Region (MR). This is a slow operation.
- *
- * This value is always 1 for RX queues.
- */
-#ifndef MLX4_PMD_TX_MP_CACHE
-#define MLX4_PMD_TX_MP_CACHE 8
-#endif
-
/* Interrupt alarm timeout value in microseconds. */
#define MLX4_INTR_ALARM_TIMEOUT 100000
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 409983f..d7447c4 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -51,6 +51,7 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
+#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_errno.h>
#include <rte_ethdev.h>
@@ -77,60 +78,63 @@
mlx4_rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n)
{
unsigned int i;
- struct rxq_elt (*elts)[elts_n] =
- rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
- rxq->socket);
+ const unsigned int sge_n = 1 << rxq->sge_n;
+ struct rte_mbuf *(*elts)[elts_n] =
+ rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, rxq->socket);
if (elts == NULL) {
+ elts_n = 0;
rte_errno = ENOMEM;
ERROR("%p: can't allocate packets array", (void *)rxq);
goto error;
}
- /* For each WR (packet). */
- for (i = 0; (i != elts_n); ++i) {
- struct rxq_elt *elt = &(*elts)[i];
- struct ibv_recv_wr *wr = &elt->wr;
- struct ibv_sge *sge = &(*elts)[i].sge;
- struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
+ rxq->elts = elts;
+ for (i = 0; i != elts_n; ++i) {
+ struct rte_mbuf *buf;
+ volatile struct mlx4_wqe_data_seg *scat =
+ &(*rxq->hw.wqes)[i];
+ buf = rte_pktmbuf_alloc(rxq->mp);
if (buf == NULL) {
rte_errno = ENOMEM;
ERROR("%p: empty mbuf pool", (void *)rxq);
goto error;
}
- elt->buf = buf;
- wr->next = &(*elts)[(i + 1)].wr;
- wr->sg_list = sge;
- wr->num_sge = 1;
/* Headroom is reserved by rte_pktmbuf_alloc(). */
assert(buf->data_off == RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
assert(rte_pktmbuf_data_len(buf) == 0);
assert(rte_pktmbuf_pkt_len(buf) == 0);
- /* sge->addr must be able to store a pointer. */
- assert(sizeof(sge->addr) >= sizeof(uintptr_t));
- /* SGE keeps its headroom. */
- sge->addr = (uintptr_t)
- ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
- sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
- sge->lkey = rxq->mr->lkey;
- /* Redundant check for tailroom. */
- assert(sge->length == rte_pktmbuf_tailroom(buf));
+ assert(!buf->next);
+ /* Only the first segment keeps headroom. */
+ if (i % sge_n)
+ buf->data_off = 0;
+ buf->port = rxq->port_id;
+ buf->data_len = rte_pktmbuf_tailroom(buf);
+ buf->pkt_len = rte_pktmbuf_tailroom(buf);
+ buf->nb_segs = 1;
+ /* scat->addr must be able to store a pointer. */
+ assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+ *scat = (struct mlx4_wqe_data_seg){
+ .addr =
+ rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)),
+ .byte_count = rte_cpu_to_be_32(buf->data_len),
+ .lkey = rte_cpu_to_be_32(rxq->mr->lkey),
+ };
+ (*rxq->elts)[i] = buf;
}
- /* The last WR pointer must be NULL. */
- (*elts)[(i - 1)].wr.next = NULL;
- DEBUG("%p: allocated and configured %u single-segment WRs",
- (void *)rxq, elts_n);
- rxq->elts_n = elts_n;
- rxq->elts_head = 0;
- rxq->elts = elts;
+ DEBUG("%p: allocated and configured %u segments (max %u packets)",
+ (void *)rxq, elts_n, elts_n >> rxq->sge_n);
+ rxq->elts_n = log2above(elts_n);
return 0;
error:
- if (elts != NULL) {
- for (i = 0; (i != RTE_DIM(*elts)); ++i)
- rte_pktmbuf_free_seg((*elts)[i].buf);
- rte_free(elts);
+ for (i = 0; i != elts_n; ++i) {
+ if ((*rxq->elts)[i] != NULL)
+ rte_pktmbuf_free_seg((*rxq->elts)[i]);
+ (*rxq->elts)[i] = NULL;
}
+ rte_free(rxq->elts);
+ rxq->elts = NULL;
DEBUG("%p: failed, freed everything", (void *)rxq);
assert(rte_errno > 0);
return -rte_errno;
@@ -146,17 +150,18 @@
mlx4_rxq_free_elts(struct rxq *rxq)
{
unsigned int i;
- unsigned int elts_n = rxq->elts_n;
- struct rxq_elt (*elts)[elts_n] = rxq->elts;
DEBUG("%p: freeing WRs", (void *)rxq);
+ if (rxq->elts == NULL)
+ return;
+
+ for (i = 0; i != (1u << rxq->elts_n); ++i) {
+ if ((*rxq->elts)[i] != NULL)
+ rte_pktmbuf_free_seg((*rxq->elts)[i]);
+ }
+ rte_free(rxq->elts);
rxq->elts_n = 0;
rxq->elts = NULL;
- if (elts == NULL)
- return;
- for (i = 0; (i != RTE_DIM(*elts)); ++i)
- rte_pktmbuf_free_seg((*elts)[i].buf);
- rte_free(elts);
}
/**
@@ -198,7 +203,8 @@
* QP pointer or NULL in case of error and rte_errno is set.
*/
static struct ibv_qp *
-mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc)
+mlx4_rxq_setup_qp(struct priv *priv, struct ibv_cq *cq,
+ uint16_t desc, unsigned int sge_n)
{
struct ibv_qp *qp;
struct ibv_qp_init_attr attr = {
@@ -212,7 +218,7 @@
priv->device_attr.max_qp_wr :
desc),
/* Max number of scatter/gather elements in a WR. */
- .max_recv_sge = 1,
+ .max_recv_sge = sge_n,
},
.qp_type = IBV_QPT_RAW_PACKET,
};
@@ -248,32 +254,43 @@
struct rte_mempool *mp)
{
struct priv *priv = dev->data->dev_private;
+ struct mlx4dv_obj mlxdv;
+ struct mlx4dv_qp dv_qp;
+ struct mlx4dv_cq dv_cq;
struct rxq tmpl = {
.priv = priv,
.mp = mp,
.socket = socket
};
struct ibv_qp_attr mod;
- struct ibv_recv_wr *bad_wr;
unsigned int mb_len;
int ret;
(void)conf; /* Thresholds configuration (ignored). */
mb_len = rte_pktmbuf_data_room_size(mp);
- if (desc == 0) {
- rte_errno = EINVAL;
- ERROR("%p: invalid number of Rx descriptors", (void *)dev);
- goto error;
- }
/* Enable scattered packets support for this queue if necessary. */
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
(mb_len - RTE_PKTMBUF_HEADROOM)) {
- ;
+ tmpl.sge_n = 0;
} else if (dev->data->dev_conf.rxmode.enable_scatter) {
- WARN("%p: scattered mode has been requested but is"
- " not supported, this may lead to packet loss",
- (void *)dev);
+ unsigned int sges_n;
+ unsigned int rx_pkt_len =
+ dev->data->dev_conf.rxmode.jumbo_frame ?
+ dev->data->dev_conf.rxmode.max_rx_pkt_len :
+ ETHER_MTU;
+
+ if (rx_pkt_len < ETHER_MTU)
+ rx_pkt_len = ETHER_MTU;
+ /* Only the first mbuf has a headroom */
+ rx_pkt_len = rx_pkt_len - mb_len + RTE_PKTMBUF_HEADROOM;
+ /*
+ * Determine the number of SGEs needed for a full packet
+ * and round it to the next power of two.
+ */
+ sges_n = (rx_pkt_len / mb_len) + !!(rx_pkt_len % mb_len) + 1;
+ tmpl.sge_n = log2above(sges_n);
+ desc >>= tmpl.sge_n;
} else {
WARN("%p: the requested maximum Rx packet size (%u) is"
" larger than a single mbuf (%u) and scattered"
@@ -282,6 +299,8 @@
dev->data->dev_conf.rxmode.max_rx_pkt_len,
mb_len - RTE_PKTMBUF_HEADROOM);
}
+ DEBUG("%p: number of sges %u (%u WRs)",
+ (void *)dev, 1 << tmpl.sge_n, desc);
/* Use the entire Rx mempool as the memory region. */
tmpl.mr = mlx4_mp2mr(priv->pd, mp);
if (tmpl.mr == NULL) {
@@ -317,7 +336,7 @@
priv->device_attr.max_qp_wr);
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.max_sge);
- tmpl.qp = mlx4_rxq_setup_qp(priv, tmpl.cq, desc);
+ tmpl.qp = mlx4_rxq_setup_qp(priv, tmpl.cq, desc, 1 << tmpl.sge_n);
if (tmpl.qp == NULL) {
ERROR("%p: QP creation failure: %s",
(void *)dev, strerror(rte_errno));
@@ -336,21 +355,6 @@
(void *)dev, strerror(rte_errno));
goto error;
}
- ret = mlx4_rxq_alloc_elts(&tmpl, desc);
- if (ret) {
- ERROR("%p: RXQ allocation failed: %s",
- (void *)dev, strerror(rte_errno));
- goto error;
- }
- ret = ibv_post_recv(tmpl.qp, &(*tmpl.elts)[0].wr, &bad_wr);
- if (ret) {
- rte_errno = ret;
- ERROR("%p: ibv_post_recv() failed for WR %p: %s",
- (void *)dev,
- (void *)bad_wr,
- strerror(rte_errno));
- goto error;
- }
mod = (struct ibv_qp_attr){
.qp_state = IBV_QPS_RTR
};
@@ -361,14 +365,44 @@
(void *)dev, strerror(rte_errno));
goto error;
}
+ /* Get HW depended info */
+ mlxdv.cq.in = tmpl.cq;
+ mlxdv.cq.out = &dv_cq;
+ mlxdv.qp.in = tmpl.qp;
+ mlxdv.qp.out = &dv_qp;
+ ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ);
+ if (ret) {
+ ERROR("%p: Failed to retrieve device obj info", (void *)dev);
+ goto error;
+ }
+ /* Init HW depended fields */
+ tmpl.hw.wqes =
+ (volatile struct mlx4_wqe_data_seg (*)[])
+ ((char *)dv_qp.buf.buf + dv_qp.rq.offset);
+ tmpl.hw.rq_db = dv_qp.rdb;
+ tmpl.hw.rq_ci = 0;
+ tmpl.mcq.buf = dv_cq.buf.buf;
+ tmpl.mcq.cqe_cnt = dv_cq.cqe_cnt;
+ tmpl.mcq.set_ci_db = dv_cq.set_ci_db;
+ tmpl.mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0;
/* Save port ID. */
tmpl.port_id = dev->data->port_id;
DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
+ ret = mlx4_rxq_alloc_elts(&tmpl, desc << tmpl.sge_n);
+ if (ret) {
+ ERROR("%p: RXQ allocation failed: %s",
+ (void *)dev, strerror(rte_errno));
+ goto error;
+ }
/* Clean up rxq in case we're reinitializing it. */
DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
mlx4_rxq_cleanup(rxq);
*rxq = tmpl;
DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
+ /* Update doorbell counter. */
+ rxq->hw.rq_ci = desc;
+ rte_wmb();
+ *rxq->hw.rq_db = rte_cpu_to_be_32(rxq->hw.rq_ci);
return 0;
error:
ret = rte_errno;
@@ -406,6 +440,12 @@
struct rxq *rxq = dev->data->rx_queues[idx];
int ret;
+ if (!rte_is_power_of_2(desc)) {
+ desc = 1 << log2above(desc);
+ WARN("%p: increased number of descriptors in RX queue %u"
+ " to the next power of two (%d)",
+ (void *)dev, idx, desc);
+ }
DEBUG("%p: configuring queue %u for %u descriptors",
(void *)dev, idx, desc);
if (idx >= dev->data->nb_rx_queues) {
diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 55c8e9a..e45bb3b 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -521,9 +521,45 @@
}
/**
- * DPDK callback for Rx.
+ * Poll one CQE from CQ.
*
- * The following function doesn't manage scattered packets.
+ * @param rxq
+ * Pointer to the receive queue structure.
+ * @param[out] out
+ * Just polled CQE.
+ *
+ * @return
+ * Number of bytes of the CQE, 0 in case there is no completion.
+ */
+static unsigned int
+mlx4_cq_poll_one(struct rxq *rxq,
+ struct mlx4_cqe **out)
+{
+ int ret = 0;
+ struct mlx4_cqe *cqe = NULL;
+ struct mlx4_cq *cq = &rxq->mcq;
+
+ cqe = (struct mlx4_cqe *)mlx4_get_cqe(cq, cq->cons_index);
+ if (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ !!(cq->cons_index & cq->cqe_cnt))
+ goto out;
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rte_rmb();
+ assert(!(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK));
+ assert((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) !=
+ MLX4_CQE_OPCODE_ERROR);
+ ret = rte_be_to_cpu_32(cqe->byte_cnt);
+ ++cq->cons_index;
+out:
+ *out = cqe;
+ return ret;
+}
+
+/**
+ * DPDK callback for RX with scattered packets support.
*
* @param dpdk_rxq
* Generic pointer to Rx queue structure.
@@ -538,112 +574,109 @@
uint16_t
mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
- struct rxq *rxq = (struct rxq *)dpdk_rxq;
- struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts;
- const unsigned int elts_n = rxq->elts_n;
- unsigned int elts_head = rxq->elts_head;
- struct ibv_wc wcs[pkts_n];
- struct ibv_recv_wr *wr_head = NULL;
- struct ibv_recv_wr **wr_next = &wr_head;
- struct ibv_recv_wr *wr_bad = NULL;
- unsigned int i;
- unsigned int pkts_ret = 0;
- int ret;
+ struct rxq *rxq = dpdk_rxq;
+ const unsigned int wr_cnt = (1 << rxq->elts_n) - 1;
+ const unsigned int sge_n = rxq->sge_n;
+ struct rte_mbuf *pkt = NULL;
+ struct rte_mbuf *seg = NULL;
+ unsigned int i = 0;
+ unsigned int rq_ci = (rxq->hw.rq_ci << sge_n);
+ int len = 0;
- ret = ibv_poll_cq(rxq->cq, pkts_n, wcs);
- if (unlikely(ret == 0))
- return 0;
- if (unlikely(ret < 0)) {
- DEBUG("rxq=%p, ibv_poll_cq() failed (wc_n=%d)",
- (void *)rxq, ret);
- return 0;
- }
- assert(ret <= (int)pkts_n);
- /* For each work completion. */
- for (i = 0; i != (unsigned int)ret; ++i) {
- struct ibv_wc *wc = &wcs[i];
- struct rxq_elt *elt = &(*elts)[elts_head];
- struct ibv_recv_wr *wr = &elt->wr;
- uint32_t len = wc->byte_len;
- struct rte_mbuf *seg = elt->buf;
- struct rte_mbuf *rep;
+ while (pkts_n) {
+ struct mlx4_cqe *cqe;
+ unsigned int idx = rq_ci & wr_cnt;
+ struct rte_mbuf *rep = (*rxq->elts)[idx];
+ volatile struct mlx4_wqe_data_seg *scat =
+ &(*rxq->hw.wqes)[idx];
- /* Sanity checks. */
- assert(wr->sg_list == &elt->sge);
- assert(wr->num_sge == 1);
- assert(elts_head < rxq->elts_n);
- assert(rxq->elts_head < rxq->elts_n);
- /*
- * Fetch initial bytes of packet descriptor into a
- * cacheline while allocating rep.
- */
- rte_mbuf_prefetch_part1(seg);
- rte_mbuf_prefetch_part2(seg);
- /* Link completed WRs together for repost. */
- *wr_next = wr;
- wr_next = &wr->next;
- if (unlikely(wc->status != IBV_WC_SUCCESS)) {
- /* Whatever, just repost the offending WR. */
- DEBUG("rxq=%p: bad work completion status (%d): %s",
- (void *)rxq, wc->status,
- ibv_wc_status_str(wc->status));
- /* Increment dropped packets counter. */
- ++rxq->stats.idropped;
- goto repost;
- }
+ /* Update the 'next' pointer of the previous segment. */
+ if (pkt)
+ seg->next = rep;
+ seg = rep;
+ rte_prefetch0(seg);
+ rte_prefetch0(scat);
rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
- /*
- * Unable to allocate a replacement mbuf,
- * repost WR.
- */
- DEBUG("rxq=%p: can't allocate a new mbuf",
- (void *)rxq);
- /* Increase out of memory counters. */
++rxq->stats.rx_nombuf;
- ++rxq->priv->dev->data->rx_mbuf_alloc_failed;
- goto repost;
+ if (!pkt) {
+ /*
+ * No buffers before we even started,
+ * bail out silently.
+ */
+ break;
+ }
+ while (pkt != seg) {
+ assert(pkt != (*rxq->elts)[idx]);
+ rep = pkt->next;
+ pkt->next = NULL;
+ pkt->nb_segs = 1;
+ rte_mbuf_raw_free(pkt);
+ pkt = rep;
+ }
+ break;
+ }
+ if (!pkt) {
+ /* Looking for the new packet */
+ len = mlx4_cq_poll_one(rxq, &cqe);
+ if (!len) {
+ rte_mbuf_raw_free(rep);
+ break;
+ }
+ if (unlikely(len < 0)) {
+ /* RX error, packet is likely too large. */
+ rte_mbuf_raw_free(rep);
+ ++rxq->stats.idropped;
+ goto skip;
+ }
+ pkt = seg;
+ pkt->packet_type = 0;
+ pkt->ol_flags = 0;
+ pkt->pkt_len = len;
}
- /* Reconfigure sge to use rep instead of seg. */
- elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
- assert(elt->sge.lkey == rxq->mr->lkey);
- elt->buf = rep;
- /* Update seg information. */
- seg->data_off = RTE_PKTMBUF_HEADROOM;
- seg->nb_segs = 1;
- seg->port = rxq->port_id;
- seg->next = NULL;
- seg->pkt_len = len;
+ rep->nb_segs = 1;
+ rep->port = rxq->port_id;
+ rep->data_len = seg->data_len;
+ rep->data_off = seg->data_off;
+ (*rxq->elts)[idx] = rep;
+ /*
+ * Fill NIC descriptor with the new buffer. The lkey and size
+ * of the buffers are already known, only the buffer address
+ * changes.
+ */
+ scat->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
+ if (len > seg->data_len) {
+ len -= seg->data_len;
+ ++pkt->nb_segs;
+ ++rq_ci;
+ continue;
+ }
+ /* The last segment. */
seg->data_len = len;
- seg->packet_type = 0;
- seg->ol_flags = 0;
+ /* Increment bytes counter. */
+ rxq->stats.ibytes += pkt->pkt_len;
/* Return packet. */
- *(pkts++) = seg;
- ++pkts_ret;
- /* Increase bytes counter. */
- rxq->stats.ibytes += len;
-repost:
- if (++elts_head >= elts_n)
- elts_head = 0;
- continue;
+ *(pkts++) = pkt;
+ pkt = NULL;
+ --pkts_n;
+ ++i;
+skip:
+ /* Align consumer index to the next stride. */
+ rq_ci >>= sge_n;
+ ++rq_ci;
+ rq_ci <<= sge_n;
}
- if (unlikely(i == 0))
+ if (unlikely((i == 0) && ((rq_ci >> sge_n) == rxq->hw.rq_ci)))
return 0;
- /* Repost WRs. */
- *wr_next = NULL;
- assert(wr_head);
- ret = ibv_post_recv(rxq->qp, wr_head, &wr_bad);
- if (unlikely(ret)) {
- /* Inability to repost WRs is fatal. */
- DEBUG("%p: recv_burst(): failed (ret=%d)",
- (void *)rxq->priv,
- ret);
- abort();
- }
- rxq->elts_head = elts_head;
- /* Increase packets counter. */
- rxq->stats.ipackets += pkts_ret;
- return pkts_ret;
+ /* Update the consumer index. */
+ rxq->hw.rq_ci = rq_ci >> sge_n;
+ rte_wmb();
+ *rxq->hw.rq_db = rte_cpu_to_be_32(rxq->hw.rq_ci);
+ *rxq->mcq.set_ci_db =
+ rte_cpu_to_be_32(rxq->mcq.cons_index & 0xffffff);
+ /* Increment packets counter. */
+ rxq->stats.ipackets += i;
+ return i;
}
/**
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index b515472..df83552 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -62,13 +62,6 @@ struct mlx4_rxq_stats {
uint64_t rx_nombuf; /**< Total of Rx mbuf allocation failures. */
};
-/** Rx element. */
-struct rxq_elt {
- struct ibv_recv_wr wr; /**< Work request. */
- struct ibv_sge sge; /**< Scatter/gather element. */
- struct rte_mbuf *buf; /**< Buffer. */
-};
-
/** Rx queue descriptor. */
struct rxq {
struct priv *priv; /**< Back pointer to private data. */
@@ -78,9 +71,15 @@ struct rxq {
struct ibv_qp *qp; /**< Queue pair. */
struct ibv_comp_channel *channel; /**< Rx completion channel. */
unsigned int port_id; /**< Port ID for incoming packets. */
- unsigned int elts_n; /**< (*elts)[] length. */
- unsigned int elts_head; /**< Current index in (*elts)[]. */
- struct rxq_elt (*elts)[]; /**< Rx elements. */
+ unsigned int elts_n; /**< Log 2 of Mbufs. */
+ struct rte_mbuf *(*elts)[]; /**< Rx elements. */
+ struct {
+ volatile struct mlx4_wqe_data_seg(*wqes)[];
+ volatile uint32_t *rq_db;
+ uint16_t rq_ci;
+ } hw;
+ struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */
+ unsigned int sge_n; /**< Log 2 of SGEs number. */
struct mlx4_rxq_stats stats; /**< Rx queue counters. */
unsigned int socket; /**< CPU socket ID for allocations. */
};
diff --git a/drivers/net/mlx4/mlx4_utils.h b/drivers/net/mlx4/mlx4_utils.h
index 0fbdc71..d6f729f 100644
--- a/drivers/net/mlx4/mlx4_utils.h
+++ b/drivers/net/mlx4/mlx4_utils.h
@@ -108,4 +108,24 @@
int mlx4_fd_set_non_blocking(int fd);
+/**
+ * Return nearest power of two above input value.
+ *
+ * @param v
+ * Input value.
+ *
+ * @return
+ * Nearest power of two above input value.
+ */
+static inline unsigned int
+log2above(unsigned int v)
+{
+ unsigned int l;
+ unsigned int r;
+
+ for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
+ r |= (v & 1);
+ return l + r;
+}
+
#endif /* MLX4_UTILS_H_ */
--
1.8.3.1
next prev parent reply other threads:[~2017-10-03 10:50 UTC|newest]
Thread overview: 61+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-24 15:54 [dpdk-dev] [PATCH 0/5] new mlx4 Tx datapath bypassing ibverbs Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 1/5] net/mlx4: add simple Tx " Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 2/5] net/mlx4: support multi-segments Tx Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 3/5] net/mlx4: refine setting Tx completion flag Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 4/5] net/mlx4: add Tx checksum offloads Moti Haimovsky
2017-08-24 15:54 ` [dpdk-dev] [PATCH 5/5] net/mlx4: add loopback Tx from VF Moti Haimovsky
2017-10-03 10:48 ` [dpdk-dev] [PATCH v2 0/6] new mlx4 datapath bypassing ibverbs Matan Azrad
2017-10-03 10:48 ` [dpdk-dev] [PATCH v2 1/6] net/mlx4: add simple Tx " Matan Azrad
2017-10-03 10:48 ` Matan Azrad [this message]
2017-10-03 10:48 ` [dpdk-dev] [PATCH v2 3/6] net/mlx4: support multi-segments Tx Matan Azrad
2017-10-03 10:48 ` [dpdk-dev] [PATCH v2 4/6] net/mlx4: get back Tx checksum offloads Matan Azrad
2017-10-03 10:48 ` [dpdk-dev] [PATCH v2 5/6] net/mlx4: get back Rx " Matan Azrad
2017-10-03 22:26 ` Ferruh Yigit
2017-10-03 10:48 ` [dpdk-dev] [PATCH v2 6/6] net/mlx4: add loopback Tx from VF Matan Azrad
2017-10-03 22:27 ` [dpdk-dev] [PATCH v2 0/6] new mlx4 datapath bypassing ibverbs Ferruh Yigit
2017-10-04 18:48 ` [dpdk-dev] [PATCH v3 " Adrien Mazarguil
2017-10-04 18:48 ` [dpdk-dev] [PATCH v3 1/6] net/mlx4: add simple Tx bypassing Verbs Adrien Mazarguil
2017-10-04 18:48 ` [dpdk-dev] [PATCH v3 2/6] net/mlx4: restore full Rx support " Adrien Mazarguil
2017-10-04 18:48 ` [dpdk-dev] [PATCH v3 3/6] net/mlx4: restore Tx gather support Adrien Mazarguil
2017-10-04 18:48 ` [dpdk-dev] [PATCH v3 4/6] net/mlx4: restore Tx checksum offloads Adrien Mazarguil
2017-10-04 18:48 ` [dpdk-dev] [PATCH v3 5/6] net/mlx4: restore Rx offloads Adrien Mazarguil
2017-10-04 18:48 ` [dpdk-dev] [PATCH v3 6/6] net/mlx4: add loopback Tx from VF Adrien Mazarguil
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 0/7] new mlx4 datapath bypassing ibverbs Ophir Munk
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 1/7] net/mlx4: add simple Tx bypassing Verbs Ophir Munk
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 2/7] net/mlx4: restore full Rx support " Ophir Munk
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 3/7] net/mlx4: restore Rx scatter support Ophir Munk
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 4/7] net/mlx4: restore Tx gather support Ophir Munk
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 5/7] net/mlx4: restore Tx checksum offloads Ophir Munk
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 6/7] net/mlx4: restore Rx offloads Ophir Munk
2017-10-05 9:33 ` [dpdk-dev] [PATCH v4 7/7] net/mlx4: add loopback Tx from VF Ophir Munk
2017-10-05 11:40 ` [dpdk-dev] [PATCH v4 0/7] new mlx4 datapath bypassing ibverbs Adrien Mazarguil
2017-10-05 18:48 ` Ferruh Yigit
2017-10-05 18:54 ` Ferruh Yigit
2017-10-11 18:31 ` [dpdk-dev] [PATCH v5 0/5] " Adrien Mazarguil
2017-10-11 18:31 ` [dpdk-dev] [PATCH v5 1/5] net/mlx4: add Tx bypassing Verbs Adrien Mazarguil
2017-10-11 18:31 ` [dpdk-dev] [PATCH v5 2/5] net/mlx4: add Rx " Adrien Mazarguil
2017-10-11 18:32 ` [dpdk-dev] [PATCH v5 3/5] net/mlx4: restore Tx checksum offloads Adrien Mazarguil
2017-10-11 18:32 ` [dpdk-dev] [PATCH v5 4/5] net/mlx4: restore Rx offloads Adrien Mazarguil
2017-10-11 18:32 ` [dpdk-dev] [PATCH v5 5/5] net/mlx4: add loopback Tx from VF Adrien Mazarguil
2017-10-12 12:29 ` [dpdk-dev] [PATCH v6 0/5] new mlx4 datapath bypassing ibverbs Adrien Mazarguil
2017-10-12 12:29 ` [dpdk-dev] [PATCH v6 1/5] net/mlx4: add Tx bypassing Verbs Adrien Mazarguil
2017-10-12 12:29 ` [dpdk-dev] [PATCH v6 2/5] net/mlx4: add Rx " Adrien Mazarguil
2017-10-12 12:29 ` [dpdk-dev] [PATCH v6 3/5] net/mlx4: restore Tx checksum offloads Adrien Mazarguil
2017-10-12 12:29 ` [dpdk-dev] [PATCH v6 4/5] net/mlx4: restore Rx offloads Adrien Mazarguil
2017-10-12 12:30 ` [dpdk-dev] [PATCH v6 5/5] net/mlx4: add loopback Tx from VF Adrien Mazarguil
2017-10-24 6:29 ` [dpdk-dev] [PATCH v6 0/5] new mlx4 datapath bypassing ibverbs gowrishankar muthukrishnan
2017-10-24 8:49 ` gowrishankar muthukrishnan
2017-10-24 9:55 ` Nélio Laranjeiro
2017-10-24 10:01 ` Adrien Mazarguil
2017-10-24 16:59 ` Ferruh Yigit
2017-10-04 21:48 ` [dpdk-dev] [PATCH v3 0/7] " Ophir Munk
2017-10-04 21:49 ` [dpdk-dev] [PATCH v3 1/7] net/mlx4: add simple Tx " Ophir Munk
2017-10-04 21:49 ` [dpdk-dev] [PATCH v3 2/7] net/mlx4: get back Rx flow functionality Ophir Munk
2017-10-04 21:49 ` [dpdk-dev] [PATCH v3 3/7] net/mlx4: support multi-segments Rx Ophir Munk
2017-10-04 21:49 ` [dpdk-dev] [PATCH v3 4/7] net/mlx4: support multi-segments Tx Ophir Munk
2017-10-04 21:49 ` [dpdk-dev] [PATCH v3 5/7] net/mlx4: get back Tx checksum offloads Ophir Munk
2017-10-04 21:49 ` [dpdk-dev] [PATCH v3 6/7] net/mlx4: get back Rx " Ophir Munk
2017-10-04 21:49 ` [dpdk-dev] [PATCH v3 7/7] net/mlx4: add loopback Tx from VF Ophir Munk
2017-10-04 22:37 ` [dpdk-dev] [PATCH v3 0/7] new mlx4 datapath bypassing ibverbs Ferruh Yigit
2017-10-04 22:46 ` Thomas Monjalon
2017-10-24 11:56 ` [dpdk-dev] [PATCH 0/5] new mlx4 Tx " Nélio Laranjeiro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1507027711-879-3-git-send-email-matan@mellanox.com \
--to=matan@mellanox.com \
--cc=adrien.mazarguil@6wind.com \
--cc=dev@dpdk.org \
--cc=motih@mellanox.com \
--cc=vasilyf@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).