From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wg0-f50.google.com (mail-wg0-f50.google.com [74.125.82.50]) by dpdk.org (Postfix) with ESMTP id 3BD0BC422 for ; Tue, 30 Jun 2015 11:29:00 +0200 (CEST) Received: by wgjx7 with SMTP id x7so4217952wgj.2 for ; Tue, 30 Jun 2015 02:29:00 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=8K/cX1PsVaaYD5jikpQ6csGWMO19XvhpUszgoSrx280=; b=mQmHu5NHMKI+WrZq8B9YpLwgA0SsIVaGC2Y6uoSLuukS912RQ1OagjMf/GnVRnD+a/ xs9LjHA4c0V5KEoY6nYPpU0+8wXv+BbRUNlklB2KcAvl3PpdaajwK4+n3QvH8hUM3FEi bYzrC16jhXz9Ac7Xc+sWxArtCHWF1KPdhMWHGVHAthfE3bMonSDdJNUv069BNriagf/l 3+JHlMb/nLUWoZY2DQ//XYFujpHMRJGniUXoQg6TapkBPFZsv1/I7iYiu1YzdMk4GqKr XLnFhasjoVux2ZtvdVZJdC13V5zc2gvALdKDxw9Luuvwdo7UEHHSYkL1TWZBiUMnu0dE cmaA== X-Gm-Message-State: ALoCoQmHdZxgeIH41VS9LcAz9ctgoH5jwTAFj1d/rBvXshAszGZxJrogSyIRP4wF91Jwerj8A4P4 X-Received: by 10.195.11.202 with SMTP id ek10mr38135668wjd.12.1435656540028; Tue, 30 Jun 2015 02:29:00 -0700 (PDT) Received: from 6wind.com (6wind.net2.nerim.net. [213.41.151.210]) by mx.google.com with ESMTPSA id um5sm67818199wjc.1.2015.06.30.02.28.58 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Tue, 30 Jun 2015 02:28:59 -0700 (PDT) From: Adrien Mazarguil To: dev@dpdk.org Date: Tue, 30 Jun 2015 11:28:00 +0200 Message-Id: <1435656489-27986-15-git-send-email-adrien.mazarguil@6wind.com> X-Mailer: git-send-email 2.1.0 In-Reply-To: <1435656489-27986-1-git-send-email-adrien.mazarguil@6wind.com> References: <1433546120-2254-1-git-send-email-adrien.mazarguil@6wind.com> <1435656489-27986-1-git-send-email-adrien.mazarguil@6wind.com> Cc: Alex Rosenbaum Subject: [dpdk-dev] [PATCH v2 14/23] mlx4: improve performance by requesting TX completion events less often X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 30 Jun 2015 09:29:00 -0000 From: Alex Rosenbaum Instead of requesting a completion event for each TX burst, request it on a fixed schedule once every MLX4_PMD_TX_PER_COMP_REQ (currently 64) packets to improve performance. Signed-off-by: Alex Rosenbaum Signed-off-by: Adrien Mazarguil --- drivers/net/mlx4/mlx4.c | 54 ++++++++++++++++++++++++++++++++----------------- drivers/net/mlx4/mlx4.h | 3 +++ 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index 1881f5b..f76f415 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -243,6 +243,8 @@ struct txq { unsigned int elts_head; /* Current index in (*elts)[]. */ unsigned int elts_tail; /* First element awaiting completion. */ unsigned int elts_comp; /* Number of completion requests. */ + unsigned int elts_comp_cd; /* Countdown for next completion request. */ + unsigned int elts_comp_cd_init; /* Initial value for countdown. */ struct mlx4_txq_stats stats; /* TX queue counters. */ linear_t (*elts_linear)[]; /* Linearized buffers. */ struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */ @@ -810,6 +812,12 @@ txq_alloc_elts(struct txq *txq, unsigned int elts_n) txq->elts_head = 0; txq->elts_tail = 0; txq->elts_comp = 0; + /* Request send completion every MLX4_PMD_TX_PER_COMP_REQ packets or + * at least 4 times per ring. */ + txq->elts_comp_cd_init = + ((MLX4_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ? + MLX4_PMD_TX_PER_COMP_REQ : (elts_n / 4)); + txq->elts_comp_cd = txq->elts_comp_cd_init; txq->elts_linear = elts_linear; txq->mr_linear = mr_linear; assert(ret == 0); @@ -896,9 +904,9 @@ txq_cleanup(struct txq *txq) * Manage TX completions. * * When sending a burst, mlx4_tx_burst() posts several WRs. - * To improve performance, a completion event is only required for the last of - * them. Doing so discards completion information for other WRs, but this - * information would not be used anyway. + * To improve performance, a completion event is only required once every + * MLX4_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information + * for other WRs, but this information would not be used anyway. * * @param txq * Pointer to TX queue structure. @@ -910,7 +918,7 @@ static int txq_complete(struct txq *txq) { unsigned int elts_comp = txq->elts_comp; - unsigned int elts_tail; + unsigned int elts_tail = txq->elts_tail; const unsigned int elts_n = txq->elts_n; struct ibv_wc wcs[elts_comp]; int wcs_n; @@ -932,17 +940,12 @@ txq_complete(struct txq *txq) elts_comp -= wcs_n; assert(elts_comp <= txq->elts_comp); /* - * Work Completion ID contains the associated element index in - * (*txq->elts)[]. Since WCs are returned in order, we only need to - * look at the last WC to clear older Work Requests. - * * Assume WC status is successful as nothing can be done about it * anyway. */ - elts_tail = WR_ID(wcs[wcs_n - 1].wr_id).id; - /* Consume the last WC. */ - if (++elts_tail >= elts_n) - elts_tail = 0; + elts_tail += wcs_n * txq->elts_comp_cd_init; + if (elts_tail >= elts_n) + elts_tail -= elts_n; txq->elts_tail = elts_tail; txq->elts_comp = elts_comp; return 0; @@ -1062,10 +1065,13 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int elts_head = txq->elts_head; const unsigned int elts_tail = txq->elts_tail; const unsigned int elts_n = txq->elts_n; + unsigned int elts_comp_cd = txq->elts_comp_cd; + unsigned int elts_comp = 0; unsigned int i; unsigned int max; int err; + assert(elts_comp_cd != 0); txq_complete(txq); max = (elts_n - (elts_head - elts_tail)); if (max > elts_n) @@ -1243,6 +1249,12 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) else #endif wr->send_flags = 0; + /* Request TX completion. */ + if (unlikely(--elts_comp_cd == 0)) { + elts_comp_cd = txq->elts_comp_cd_init; + ++elts_comp; + wr->send_flags |= IBV_SEND_SIGNALED; + } if (++elts_head >= elts_n) elts_head = 0; #ifdef MLX4_PMD_SOFT_COUNTERS @@ -1259,14 +1271,11 @@ stop: txq->stats.opackets += i; #endif *wr_next = NULL; - /* The last WR is the only one asking for a completion event. */ - containerof(wr_next, mlx4_send_wr_t, next)-> - send_flags |= IBV_SEND_SIGNALED; err = mlx4_post_send(txq->qp, head.next, &bad_wr); if (unlikely(err)) { unsigned int unsent = 0; - /* An error occurred, completion event is lost. Fix counters. */ + /* An error occurred, fix counters. */ while (bad_wr != NULL) { struct txq_elt *elt = containerof(bad_wr, struct txq_elt, wr); @@ -1285,6 +1294,14 @@ stop: txq->stats.obytes -= wr->sg_list[j].length; #endif ++unsent; + if (wr->send_flags & IBV_SEND_SIGNALED) { + assert(elts_comp != 0); + --elts_comp; + } + if (elts_comp_cd == txq->elts_comp_cd_init) + elts_comp_cd = 1; + else + ++elts_comp_cd; #ifndef NDEBUG /* For assert(). */ for (j = 0; ((int)j < wr->num_sge); ++j) { @@ -1310,9 +1327,10 @@ stop: DEBUG("%p: mlx4_post_send() failed, %u unprocessed WRs: %s", (void *)txq, unsent, ((err <= -1) ? "Internal error" : strerror(err))); - } else - ++txq->elts_comp; + } txq->elts_head = elts_head; + txq->elts_comp += elts_comp; + txq->elts_comp_cd = elts_comp_cd; return i; } diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 433aa3b..151c34b 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -51,6 +51,9 @@ /* Maximum number of simultaneous VLAN filters supported. See above. */ #define MLX4_MAX_VLAN_IDS 127 +/* Request send completion once in every 64 sends, might be less. */ +#define MLX4_PMD_TX_PER_COMP_REQ 64 + /* Maximum number of Scatter/Gather Elements per Work Request. */ #ifndef MLX4_PMD_SGE_WR_N #define MLX4_PMD_SGE_WR_N 4 -- 2.1.0