From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <nelio.laranjeiro@6wind.com>
Received: from mail-wm0-f49.google.com (mail-wm0-f49.google.com [74.125.82.49])
 by dpdk.org (Postfix) with ESMTP id 0B45FC5E8
 for <dev@dpdk.org>; Thu, 23 Jun 2016 19:06:10 +0200 (CEST)
Received: by mail-wm0-f49.google.com with SMTP id v199so134669006wmv.0
 for <dev@dpdk.org>; Thu, 23 Jun 2016 10:06:10 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=6wind-com.20150623.gappssmtp.com; s=20150623;
 h=from:to:cc:subject:date:message-id:in-reply-to:references;
 bh=dNw0yETYX0yXQwyzF4bAnrmNiFbWbN9R2Ndit+i6oIc=;
 b=akWvSWG6M9iF84lpz0AZ0no2XQCeY0L7+8gC7QHNYEDNUmLsSkH/0FMaMC4jJ9Cefs
 GObDuj5aal4EkzXbeLd6vgTMXGEjW7aF47p2FcJRHqzkftqobGKHPjDCZQxOIq3BUXZp
 6RApFXqhRkQ2+t/kTu6+jSk0xBNwm+gSLnYBi1RLh7z2PWLhFyil3g4Kw1NJOJGca9et
 gqGemEqsf2TwqzMng3TywgfXzC7plPNMIAjZSxlJfA/Eai3KEW1Un6nfe3LsaceV8D3Z
 i6vTtv/a6DkmybACkpF/4u4KFZqQr/JfSP+V+rD0Bm+qRh5TOT0EoowZCyHr8n+UZdvU
 b6Ig==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 d=1e100.net; s=20130820;
 h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
 :references;
 bh=dNw0yETYX0yXQwyzF4bAnrmNiFbWbN9R2Ndit+i6oIc=;
 b=kd5o8mMjEVgtp6KdeYalT9BknlsXLeUOoyD49ERdfUBEZGsKktzGK6XEo5hEtOCpNo
 ioqsjU8xzn/55yELgyOeiNYyVm2I2x7iwEiNHQCTLk1UkwZsWk31v2V9w8TTr0DkQ4aE
 q8zqCLtA1OKW16lshVXw/dsiGwvnhJQFlBepzISrDqhg84Uw2uMpzjePkGRXlVjGaWJX
 /oHAB4UuyePbEAY38vEsOsdZUaaG8AdmD1Om1YK6KYYvFIiaKZije9mq4Ig4U+2Gww2O
 mO2EDu9A90Q1eQYB0SNo0O5Nyfuwip1PXqmQJ3yY6Um37+bEAp69hxNl5bZ+87wrpki7
 D9DA==
X-Gm-Message-State: ALyK8tLLJB/kushZVj5VbnyhRgqXxigHcSD2H8106eBES622f99mnQgNCqrgzKZVqsihIHia
X-Received: by 10.194.178.167 with SMTP id cz7mr13454423wjc.66.1466701569714; 
 Thu, 23 Jun 2016 10:06:09 -0700 (PDT)
Received: from ping.vm.6wind.com (guy78-3-82-239-227-177.fbx.proxad.net.
 [82.239.227.177])
 by smtp.gmail.com with ESMTPSA id q63sm4770644wma.0.2016.06.23.10.06.08
 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128);
 Thu, 23 Jun 2016 10:06:09 -0700 (PDT)
From: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
To: dev@dpdk.org
Cc: Ferruh Yigit <ferruh.yigit@intel.com>,
 Adrien Mazarguil <adrien.mazarguil@6wind.com>,
 Vasily Philipov <vasilyf@mellanox.com>
Date: Thu, 23 Jun 2016 19:05:22 +0200
Message-Id: <1466700801-10383-11-git-send-email-nelio.laranjeiro@6wind.com>
X-Mailer: git-send-email 2.1.4
In-Reply-To: <1466700801-10383-1-git-send-email-nelio.laranjeiro@6wind.com>
References: <1466586355-30777-1-git-send-email-nelio.laranjeiro@6wind.com>
 <1466700801-10383-1-git-send-email-nelio.laranjeiro@6wind.com>
Subject: [dpdk-dev] [PATCH v5 24/25] mlx5: make Rx queue reinitialization
	safer
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.15
Precedence: list
List-Id: patches and discussions about DPDK <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
X-List-Received-Date: Thu, 23 Jun 2016 17:06:10 -0000
Content-Type: text/plain; charset="UTF-8"
Message-ID: <20160623170522.aUY4uI8lBHnaD2fvyrjqw8Mq1wBmeYRd74CMgbpLwKM@z>

From: Adrien Mazarguil <adrien.mazarguil@6wind.com>

The primary purpose of rxq_rehash() function is to stop and restart
reception on a queue after re-posting buffers. This may fail if the array
that temporarily stores existing buffers for reuse cannot be allocated.

Update rxq_rehash() to work on the target queue directly (not through a
template copy) and avoid this allocation.

rxq_alloc_elts() is modified accordingly to take buffers from an existing
queue directly and update their refcount.

Unlike rxq_rehash(), rxq_setup() must work on a temporary structure but
should not allocate new mbufs from the pool while reinitializing an
existing queue. This is achieved by using the refcount-aware
rxq_alloc_elts() before overwriting queue data.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
---
 drivers/net/mlx5/mlx5_rxq.c | 83 ++++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 42 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index fbf14fa..b2ddd0d 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -642,7 +642,7 @@ priv_rehash_flows(struct priv *priv)
  */
 static int
 rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
-	       struct rte_mbuf **pool)
+	       struct rte_mbuf *(*pool)[])
 {
 	unsigned int i;
 	int ret = 0;
@@ -654,9 +654,10 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
 			&(*rxq_ctrl->rxq.wqes)[i];
 
 		if (pool != NULL) {
-			buf = *(pool++);
+			buf = (*pool)[i];
 			assert(buf != NULL);
 			rte_pktmbuf_reset(buf);
+			rte_pktmbuf_refcnt_update(buf, 1);
 		} else
 			buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
 		if (buf == NULL) {
@@ -781,7 +782,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 }
 
 /**
- * Reconfigure a RX queue with new parameters.
+ * Reconfigure RX queue buffers.
  *
  * rxq_rehash() does not allocate mbufs, which, if not done from the right
  * thread (such as a control thread), may corrupt the pool.
@@ -798,67 +799,48 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
 int
 rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
 {
-	struct rxq_ctrl tmpl = *rxq_ctrl;
-	unsigned int mbuf_n;
-	unsigned int desc_n;
-	struct rte_mbuf **pool;
-	unsigned int i, k;
+	unsigned int elts_n = rxq_ctrl->rxq.elts_n;
+	unsigned int i;
 	struct ibv_exp_wq_attr mod;
 	int err;
 
 	DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq_ctrl);
-	/* Number of descriptors and mbufs currently allocated. */
-	desc_n = tmpl.rxq.elts_n;
-	mbuf_n = desc_n;
 	/* From now on, any failure will render the queue unusable.
 	 * Reinitialize WQ. */
 	mod = (struct ibv_exp_wq_attr){
 		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
 		.wq_state = IBV_EXP_WQS_RESET,
 	};
-	err = ibv_exp_modify_wq(tmpl.wq, &mod);
+	err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
 	if (err) {
 		ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
 		assert(err > 0);
 		return err;
 	}
-	/* Allocate pool. */
-	pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
-	if (pool == NULL) {
-		ERROR("%p: cannot allocate memory", (void *)dev);
-		return ENOBUFS;
-	}
 	/* Snatch mbufs from original queue. */
-	k = 0;
-	for (i = 0; (i != desc_n); ++i)
-		pool[k++] = (*rxq_ctrl->rxq.elts)[i];
-	assert(k == mbuf_n);
-	rte_free(pool);
+	claim_zero(rxq_alloc_elts(rxq_ctrl, elts_n, rxq_ctrl->rxq.elts));
+	for (i = 0; i != elts_n; ++i) {
+		struct rte_mbuf *buf = (*rxq_ctrl->rxq.elts)[i];
+
+		assert(rte_mbuf_refcnt_read(buf) == 2);
+		rte_pktmbuf_free_seg(buf);
+	}
 	/* Change queue state to ready. */
 	mod = (struct ibv_exp_wq_attr){
 		.attr_mask = IBV_EXP_WQ_ATTR_STATE,
 		.wq_state = IBV_EXP_WQS_RDY,
 	};
-	err = ibv_exp_modify_wq(tmpl.wq, &mod);
+	err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
 	if (err) {
 		ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
 		      (void *)dev, strerror(err));
 		goto error;
 	}
-	/* Post SGEs. */
-	err = rxq_alloc_elts(&tmpl, desc_n, pool);
-	if (err) {
-		ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
-		rte_free(pool);
-		assert(err > 0);
-		return err;
-	}
 	/* Update doorbell counter. */
-	rxq_ctrl->rxq.rq_ci = desc_n;
+	rxq_ctrl->rxq.rq_ci = elts_n;
 	rte_wmb();
 	*rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
 error:
-	*rxq_ctrl = tmpl;
 	assert(err >= 0);
 	return err;
 }
@@ -868,24 +850,26 @@ error:
  *
  * @param tmpl
  *   Pointer to RX queue control template.
- * @param rxq_ctrl
- *   Pointer to RX queue control.
  *
  * @return
  *   0 on success, errno value on failure.
  */
 static inline int
-rxq_setup(struct rxq_ctrl *tmpl, struct rxq_ctrl *rxq_ctrl)
+rxq_setup(struct rxq_ctrl *tmpl)
 {
 	struct ibv_cq *ibcq = tmpl->cq;
 	struct mlx5_cq *cq = to_mxxx(cq, cq);
 	struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
+	struct rte_mbuf *(*elts)[tmpl->rxq.elts_n] =
+		rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
 
 	if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) {
 		ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
 		      "it should be set to %u", RTE_CACHE_LINE_SIZE);
 		return EINVAL;
 	}
+	if (elts == NULL)
+		return ENOMEM;
 	tmpl->rxq.rq_db = rwq->rq.db;
 	tmpl->rxq.cqe_n = ibcq->cqe + 1;
 	tmpl->rxq.cq_ci = 0;
@@ -897,9 +881,7 @@ rxq_setup(struct rxq_ctrl *tmpl, struct rxq_ctrl *rxq_ctrl)
 	tmpl->rxq.cqes =
 		(volatile struct mlx5_cqe (*)[])
 		(uintptr_t)cq->active_buf->buf;
-	tmpl->rxq.elts =
-		(struct rte_mbuf *(*)[tmpl->rxq.elts_n])
-		((uintptr_t)rxq_ctrl + sizeof(*rxq_ctrl));
+	tmpl->rxq.elts = elts;
 	return 0;
 }
 
@@ -947,6 +929,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	enum ibv_exp_query_intf_status status;
 	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
 	unsigned int cqe_n = desc - 1;
+	struct rte_mbuf *(*elts)[desc] = NULL;
 	int ret = 0;
 
 	(void)conf; /* Thresholds configuration (ignored). */
@@ -1104,13 +1087,19 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	ret = rxq_setup(&tmpl, rxq_ctrl);
+	ret = rxq_setup(&tmpl);
 	if (ret) {
 		ERROR("%p: cannot initialize RX queue structure: %s",
 		      (void *)dev, strerror(ret));
 		goto error;
 	}
-	ret = rxq_alloc_elts(&tmpl, desc, NULL);
+	/* Reuse buffers from original queue if possible. */
+	if (rxq_ctrl->rxq.elts_n) {
+		assert(rxq_ctrl->rxq.elts_n == desc);
+		assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
+		ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
+	} else
+		ret = rxq_alloc_elts(&tmpl, desc, NULL);
 	if (ret) {
 		ERROR("%p: RXQ allocation failed: %s",
 		      (void *)dev, strerror(ret));
@@ -1119,6 +1108,14 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	/* Clean up rxq in case we're reinitializing it. */
 	DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
 	rxq_cleanup(rxq_ctrl);
+	/* Move mbuf pointers to dedicated storage area in RX queue. */
+	elts = (void *)(rxq_ctrl + 1);
+	rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
+#ifndef NDEBUG
+	memset(tmpl.rxq.elts, 0x55, sizeof(*elts));
+#endif
+	rte_free(tmpl.rxq.elts);
+	tmpl.rxq.elts = elts;
 	*rxq_ctrl = tmpl;
 	/* Update doorbell counter. */
 	rxq_ctrl->rxq.rq_ci = desc;
@@ -1128,7 +1125,9 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
 	assert(ret == 0);
 	return 0;
 error:
+	elts = tmpl.rxq.elts;
 	rxq_cleanup(&tmpl);
+	rte_free(elts);
 	assert(ret > 0);
 	return ret;
 }
-- 
2.1.4