From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wg0-f44.google.com (mail-wg0-f44.google.com [74.125.82.44]) by dpdk.org (Postfix) with ESMTP id 90305C40A for ; Tue, 30 Jun 2015 11:28:56 +0200 (CEST) Received: by wguu7 with SMTP id u7so4201370wgu.3 for ; Tue, 30 Jun 2015 02:28:56 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=Anl4GLYUZocCBF9Xc7wCSHbtNAJ7cDbmYdLJptzW5hc=; b=Hp24mmKytOaLx6RXZyXXvwIDzm6TnMzji9esDX13qgCc5jfzMXXTerwdUoh/pvEBup X12GfG1iZUTc1l2OuKC7U4Hk8aMu18lPnJbGg9IOdlSbFsqMwhQUEcZyQeWP7NU6Z0Xp hIK2jHPL4rGRZF/raI/PKyqlt/Kd0oMyi6Sknhf9IVgJPLB0/qwoxD2TTtLT5n0dymts 8LIr56GUc+emfDVxdj++tDCMBrwN7zl80QHW6/yfFbZumcEAWZhZkzTRu+DSs6bBGGNA hDr0DgajtWJRirlxJANWyr38chqKw/lx2G9QDg3yooF7iAZ9JOxFpjjqMb86/W+/lM5y yLYQ== X-Gm-Message-State: ALoCoQnKShebNzDoo0L+KRkiyu0hgTZqIdv1uY+HPC0k156ZlNbw69xCrx9HktyjSVh13G9nv+LB X-Received: by 10.180.79.162 with SMTP id k2mr13738314wix.46.1435656536333; Tue, 30 Jun 2015 02:28:56 -0700 (PDT) Received: from 6wind.com (6wind.net2.nerim.net. [213.41.151.210]) by mx.google.com with ESMTPSA id ej5sm67962162wjd.22.2015.06.30.02.28.54 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Tue, 30 Jun 2015 02:28:55 -0700 (PDT) From: Adrien Mazarguil To: dev@dpdk.org Date: Tue, 30 Jun 2015 11:27:59 +0200 Message-Id: <1435656489-27986-14-git-send-email-adrien.mazarguil@6wind.com> X-Mailer: git-send-email 2.1.0 In-Reply-To: <1435656489-27986-1-git-send-email-adrien.mazarguil@6wind.com> References: <1433546120-2254-1-git-send-email-adrien.mazarguil@6wind.com> <1435656489-27986-1-git-send-email-adrien.mazarguil@6wind.com> Cc: Alex Rosenbaum Subject: [dpdk-dev] [PATCH v2 13/23] mlx4: use MOFED 3.0 fast verbs interface for RX operations X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 30 Jun 2015 09:28:57 -0000 From: Alex Rosenbaum This commit replaces the CQ polling and QP posting functions (mlx4_rx_burst() only) with a new low level interface to improve performance. Signed-off-by: Alex Rosenbaum Signed-off-by: Gilad Berman Signed-off-by: Adrien Mazarguil --- drivers/net/mlx4/mlx4.c | 249 +++++++++++++++++++++++++++++++----------------- 1 file changed, 162 insertions(+), 87 deletions(-) diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index cdc679a..1881f5b 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -188,6 +188,8 @@ struct rxq { struct ibv_mr *mr; /* Memory Region (for mp). */ struct ibv_cq *cq; /* Completion Queue. */ struct ibv_qp *qp; /* Queue Pair. */ + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */ + struct ibv_exp_cq_family *if_cq; /* CQ interface. */ /* * Each VLAN ID requires a separate flow steering rule. */ @@ -2319,11 +2321,35 @@ rxq_promiscuous_disable(struct rxq *rxq) static void rxq_cleanup(struct rxq *rxq) { + struct ibv_exp_release_intf_params params; + DEBUG("cleaning up %p", (void *)rxq); if (rxq->sp) rxq_free_elts_sp(rxq); else rxq_free_elts(rxq); + if (rxq->if_qp != NULL) { + assert(rxq->priv != NULL); + assert(rxq->priv->ctx != NULL); + assert(rxq->qp != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(rxq->priv->ctx, + rxq->if_qp, + ¶ms)); + } + if (rxq->if_cq != NULL) { + assert(rxq->priv != NULL); + assert(rxq->priv->ctx != NULL); + assert(rxq->cq != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(rxq->priv->ctx, + rxq->if_cq, + ¶ms)); + } if (rxq->qp != NULL) { rxq_promiscuous_disable(rxq); rxq_allmulticast_disable(rxq); @@ -2360,34 +2386,23 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp; const unsigned int elts_n = rxq->elts_n; unsigned int elts_head = rxq->elts_head; - struct ibv_wc wcs[pkts_n]; struct ibv_recv_wr head; struct ibv_recv_wr **next = &head.next; struct ibv_recv_wr *bad_wr; - int ret = 0; - int wcs_n; - int i; + unsigned int i; + unsigned int pkts_ret = 0; + int ret; if (unlikely(!rxq->sp)) return mlx4_rx_burst(dpdk_rxq, pkts, pkts_n); if (unlikely(elts == NULL)) /* See RTE_DEV_CMD_SET_MTU. */ return 0; - wcs_n = ibv_poll_cq(rxq->cq, pkts_n, wcs); - if (unlikely(wcs_n == 0)) - return 0; - if (unlikely(wcs_n < 0)) { - DEBUG("rxq=%p, ibv_poll_cq() failed (wc_n=%d)", - (void *)rxq, wcs_n); - return 0; - } - assert(wcs_n <= (int)pkts_n); - /* For each work completion. */ - for (i = 0; (i != wcs_n); ++i) { - struct ibv_wc *wc = &wcs[i]; - uint64_t wr_id = wc->wr_id; - uint32_t len = wc->byte_len; + for (i = 0; (i != pkts_n); ++i) { struct rxq_elt_sp *elt = &(*elts)[elts_head]; struct ibv_recv_wr *wr = &elt->wr; + uint64_t wr_id = wr->wr_id; + unsigned int len; + unsigned int pkt_buf_len; struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */ struct rte_mbuf **pkt_buf_next = &pkt_buf; unsigned int seg_headroom = RTE_PKTMBUF_HEADROOM; @@ -2398,26 +2413,51 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) (void)wr_id; #endif assert(wr_id < rxq->elts_n); - assert(wr_id == wr->wr_id); assert(wr->sg_list == elt->sges); assert(wr->num_sge == elemof(elt->sges)); assert(elts_head < rxq->elts_n); assert(rxq->elts_head < rxq->elts_n); - /* Link completed WRs together for repost. */ - *next = wr; - next = &wr->next; - if (unlikely(wc->status != IBV_WC_SUCCESS)) { - /* Whatever, just repost the offending WR. */ - DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work completion" - " status (%d): %s", - (void *)rxq, wc->wr_id, wc->status, - ibv_wc_status_str(wc->status)); + ret = rxq->if_cq->poll_length(rxq->cq, NULL, NULL); + if (unlikely(ret < 0)) { + struct ibv_wc wc; + int wcs_n; + + DEBUG("rxq=%p, poll_length() failed (ret=%d)", + (void *)rxq, ret); + /* ibv_poll_cq() must be used in case of failure. */ + wcs_n = ibv_poll_cq(rxq->cq, 1, &wc); + if (unlikely(wcs_n == 0)) + break; + if (unlikely(wcs_n < 0)) { + DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)", + (void *)rxq, wcs_n); + break; + } + assert(wcs_n == 1); + if (unlikely(wc.status != IBV_WC_SUCCESS)) { + /* Whatever, just repost the offending WR. */ + DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work" + " completion status (%d): %s", + (void *)rxq, wc.wr_id, wc.status, + ibv_wc_status_str(wc.status)); #ifdef MLX4_PMD_SOFT_COUNTERS - /* Increase dropped packets counter. */ - ++rxq->stats.idropped; + /* Increment dropped packets counter. */ + ++rxq->stats.idropped; #endif - goto repost; + /* Link completed WRs together for repost. */ + *next = wr; + next = &wr->next; + goto repost; + } + ret = wc.byte_len; } + if (ret == 0) + break; + len = ret; + pkt_buf_len = len; + /* Link completed WRs together for repost. */ + *next = wr; + next = &wr->next; /* * Replace spent segments with new ones, concatenate and * return them as pkt_buf. @@ -2502,42 +2542,43 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) assert(j != 0); NB_SEGS(pkt_buf) = j; PORT(pkt_buf) = rxq->port_id; - PKT_LEN(pkt_buf) = wc->byte_len; + PKT_LEN(pkt_buf) = pkt_buf_len; pkt_buf->ol_flags = 0; /* Return packet. */ *(pkts++) = pkt_buf; - ++ret; + ++pkts_ret; #ifdef MLX4_PMD_SOFT_COUNTERS /* Increase bytes counter. */ - rxq->stats.ibytes += wc->byte_len; + rxq->stats.ibytes += pkt_buf_len; #endif repost: if (++elts_head >= elts_n) elts_head = 0; continue; } + if (unlikely(i == 0)) + return 0; *next = NULL; /* Repost WRs. */ #ifdef DEBUG_RECV - DEBUG("%p: reposting %d WRs starting from %" PRIu64 " (%p)", - (void *)rxq, wcs_n, wcs[0].wr_id, (void *)head.next); + DEBUG("%p: reposting %d WRs", (void *)rxq, i); #endif - i = ibv_post_recv(rxq->qp, head.next, &bad_wr); - if (unlikely(i)) { + ret = ibv_post_recv(rxq->qp, head.next, &bad_wr); + if (unlikely(ret)) { /* Inability to repost WRs is fatal. */ DEBUG("%p: ibv_post_recv(): failed for WR %p: %s", (void *)rxq->priv, (void *)bad_wr, - strerror(i)); + strerror(ret)); abort(); } rxq->elts_head = elts_head; #ifdef MLX4_PMD_SOFT_COUNTERS /* Increase packets counter. */ - rxq->stats.ipackets += ret; + rxq->stats.ipackets += pkts_ret; #endif - return ret; + return pkts_ret; } /** @@ -2564,58 +2605,64 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp; const unsigned int elts_n = rxq->elts_n; unsigned int elts_head = rxq->elts_head; - struct ibv_wc wcs[pkts_n]; - struct ibv_recv_wr head; - struct ibv_recv_wr **next = &head.next; - struct ibv_recv_wr *bad_wr; - int ret = 0; - int wcs_n; - int i; + struct ibv_sge sges[pkts_n]; + unsigned int i; + unsigned int pkts_ret = 0; + int ret; if (unlikely(rxq->sp)) return mlx4_rx_burst_sp(dpdk_rxq, pkts, pkts_n); - wcs_n = ibv_poll_cq(rxq->cq, pkts_n, wcs); - if (unlikely(wcs_n == 0)) - return 0; - if (unlikely(wcs_n < 0)) { - DEBUG("rxq=%p, ibv_poll_cq() failed (wc_n=%d)", - (void *)rxq, wcs_n); - return 0; - } - assert(wcs_n <= (int)pkts_n); - /* For each work completion. */ - for (i = 0; (i != wcs_n); ++i) { - struct ibv_wc *wc = &wcs[i]; - uint64_t wr_id = wc->wr_id; - uint32_t len = wc->byte_len; + for (i = 0; (i != pkts_n); ++i) { struct rxq_elt *elt = &(*elts)[elts_head]; struct ibv_recv_wr *wr = &elt->wr; + uint64_t wr_id = wr->wr_id; + unsigned int len; struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr - WR_ID(wr_id).offset); struct rte_mbuf *rep; /* Sanity checks. */ assert(WR_ID(wr_id).id < rxq->elts_n); - assert(wr_id == wr->wr_id); assert(wr->sg_list == &elt->sge); assert(wr->num_sge == 1); assert(elts_head < rxq->elts_n); assert(rxq->elts_head < rxq->elts_n); - /* Link completed WRs together for repost. */ - *next = wr; - next = &wr->next; - if (unlikely(wc->status != IBV_WC_SUCCESS)) { - /* Whatever, just repost the offending WR. */ - DEBUG("rxq=%p, wr_id=%" PRIu32 ": bad work completion" - " status (%d): %s", - (void *)rxq, WR_ID(wr_id).id, wc->status, - ibv_wc_status_str(wc->status)); + ret = rxq->if_cq->poll_length(rxq->cq, NULL, NULL); + if (unlikely(ret < 0)) { + struct ibv_wc wc; + int wcs_n; + + DEBUG("rxq=%p, poll_length() failed (ret=%d)", + (void *)rxq, ret); + /* ibv_poll_cq() must be used in case of failure. */ + wcs_n = ibv_poll_cq(rxq->cq, 1, &wc); + if (unlikely(wcs_n == 0)) + break; + if (unlikely(wcs_n < 0)) { + DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)", + (void *)rxq, wcs_n); + break; + } + assert(wcs_n == 1); + if (unlikely(wc.status != IBV_WC_SUCCESS)) { + /* Whatever, just repost the offending WR. */ + DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work" + " completion status (%d): %s", + (void *)rxq, wc.wr_id, wc.status, + ibv_wc_status_str(wc.status)); #ifdef MLX4_PMD_SOFT_COUNTERS - /* Increase dropped packets counter. */ - ++rxq->stats.idropped; + /* Increment dropped packets counter. */ + ++rxq->stats.idropped; #endif - goto repost; + /* Add SGE to array for repost. */ + sges[i] = elt->sge; + goto repost; + } + ret = wc.byte_len; } + if (ret == 0) + break; + len = ret; /* * Fetch initial bytes of packet descriptor into a * cacheline while allocating rep. @@ -2644,6 +2691,9 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) (uintptr_t)rep); assert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id); + /* Add SGE to array for repost. */ + sges[i] = elt->sge; + /* Update seg information. */ SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM); NB_SEGS(seg) = 1; @@ -2655,37 +2705,36 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Return packet. */ *(pkts++) = seg; - ++ret; + ++pkts_ret; #ifdef MLX4_PMD_SOFT_COUNTERS /* Increase bytes counter. */ - rxq->stats.ibytes += wc->byte_len; + rxq->stats.ibytes += len; #endif repost: if (++elts_head >= elts_n) elts_head = 0; continue; } - *next = NULL; + if (unlikely(i == 0)) + return 0; /* Repost WRs. */ #ifdef DEBUG_RECV - DEBUG("%p: reposting %d WRs starting from %" PRIu32 " (%p)", - (void *)rxq, wcs_n, WR_ID(wcs[0].wr_id).id, (void *)head.next); + DEBUG("%p: reposting %u WRs", (void *)rxq, i); #endif - i = ibv_post_recv(rxq->qp, head.next, &bad_wr); - if (unlikely(i)) { + ret = rxq->if_qp->recv_burst(rxq->qp, sges, i); + if (unlikely(ret)) { /* Inability to repost WRs is fatal. */ - DEBUG("%p: ibv_post_recv(): failed for WR %p: %s", + DEBUG("%p: recv_burst(): failed (ret=%d)", (void *)rxq->priv, - (void *)bad_wr, - strerror(i)); + ret); abort(); } rxq->elts_head = elts_head; #ifdef MLX4_PMD_SOFT_COUNTERS /* Increase packets counter. */ - rxq->stats.ipackets += ret; + rxq->stats.ipackets += pkts_ret; #endif - return ret; + return pkts_ret; } /** @@ -3019,6 +3068,10 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, .socket = socket }; struct ibv_exp_qp_attr mod; + union { + struct ibv_exp_query_intf_params params; + } attr; + enum ibv_exp_query_intf_status status; struct ibv_recv_wr *bad_wr; struct rte_mbuf *buf; int ret = 0; @@ -3160,6 +3213,28 @@ skip_alloc: /* Save port ID. */ tmpl.port_id = dev->data->port_id; DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id); + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_CQ, + .obj = tmpl.cq, + }; + tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_cq == NULL) { + ERROR("%p: CQ interface family query failed with status %d", + (void *)dev, status); + goto error; + } + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_QP_BURST, + .obj = tmpl.qp, + }; + tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_qp == NULL) { + ERROR("%p: QP interface family query failed with status %d", + (void *)dev, status); + goto error; + } /* Clean up rxq in case we're reinitializing it. */ DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq); rxq_cleanup(rxq); -- 2.1.0