From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wr0-f182.google.com (mail-wr0-f182.google.com [209.85.128.182]) by dpdk.org (Postfix) with ESMTP id 4EEC9A0C2 for ; Wed, 2 Aug 2017 16:11:06 +0200 (CEST) Received: by mail-wr0-f182.google.com with SMTP id 12so19315315wrb.1 for ; Wed, 02 Aug 2017 07:11:06 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=6wind-com.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id:in-reply-to:references :in-reply-to:references; bh=lEo0pSQuFdmYdpnmNiQpDMys7L2bC1kGvVY1jMonDdU=; b=vmAOIkAW/+KVooWc42ZWNarLHGRTf9mtKD4AlvPCXR/9HHeWTNyKhkCY2AmJy6c+DF 796AThH2W9W/O9bURzi55ZWncG8pAwC78TxqUf262bPUllpryxuRhDxNTxGRQaFY6y9o 0pCJFjUkAOwvnJbXNDVw8HTW5n3NZWEdDt6O6O6YbYAkj75cfKj9anuGjiKsfGPOmfS8 +TV1HERLBgTt0PfzKo4jeoKWvShdFwsXLWljICuUrfNco8ZIm3A85fmlkBZa6lHWiI1o UCkniGiDGp053WHSz/V5GVFwtTs3QFzW3bY9Rsxh6zf9pTmHq8ZrHsYaccT6SSfFuimS 6n0g== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:in-reply-to:references; bh=lEo0pSQuFdmYdpnmNiQpDMys7L2bC1kGvVY1jMonDdU=; b=FBu7Cr6M7nsLZbCYekW1q48JnIEPmroDLe5X7AMDKvCJ8H/wLKXaI2kUgXPFL9POtR 539WQAXRbbSqBABLj2UFuR08ygkfBHib6sezLiksF0K0FRE3LE8+jz84qimuDFVeNLzu Rkt+KohIQkBnPK08JQPXVpBbgqLl1Vi8F0WjfavUq2FKL0PL6ElMBxGHtu57r62D6/CM 0tFTsF2V0SXlMok2vh9tiIHYVM8SBbmeEGhdFzR8wH7vQLf1qMtqSoIBtz3x/TEHjG6Q z8bq3MuFu9E/n15VFp+T0M2BYu0SvvgrrS4bgb1kEqjehsX/KWYUgK7YizeNtj+nvnV5 vV8w== X-Gm-Message-State: AIVw111MUVOM+wU5Ay+T8lSY7e1/LKbWDXUniNQmP+sdn2JQqC+aZEF4 yQ5IGITjfFhhGIPdYkW0WQ== X-Received: by 10.223.166.7 with SMTP id k7mr19629280wrc.34.1501683065498; Wed, 02 Aug 2017 07:11:05 -0700 (PDT) Received: from ping.dev.6wind.com (host.78.145.23.62.rev.coltfrance.com. [62.23.145.78]) by smtp.gmail.com with ESMTPSA id d53sm39449552wrd.81.2017.08.02.07.11.04 (version=TLS1_2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Wed, 02 Aug 2017 07:11:04 -0700 (PDT) From: Nelio Laranjeiro To: dev@dpdk.org Cc: adrien.mazarguil@6wind.com Date: Wed, 2 Aug 2017 16:10:23 +0200 Message-Id: <1aabdbe993975be19a13da75fa1522aff636deca.1501681927.git.nelio.laranjeiro@6wind.com> X-Mailer: git-send-email 2.1.4 In-Reply-To: References: In-Reply-To: References: Subject: [dpdk-dev] [PATCH v1 07/21] net/mlx5: add reference counter on memory region X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 02 Aug 2017 14:11:06 -0000 Memory regions becomes shared by the same time, the control plane as it own reference in addition of the Tx/Rx queues. This should also avoid to un-register a memory region when the Tx queue evicts it from its local cache. Signed-off-by: Nelio Laranjeiro --- drivers/net/mlx5/mlx5.h | 8 ++ drivers/net/mlx5/mlx5_mr.c | 202 ++++++++++++++++++++++++++++++------------- drivers/net/mlx5/mlx5_rxq.c | 17 ++-- drivers/net/mlx5/mlx5_rxtx.h | 42 +++++---- drivers/net/mlx5/mlx5_txq.c | 8 +- 5 files changed, 186 insertions(+), 91 deletions(-) diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index a5e9aa1..1ae5f59 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -145,6 +145,7 @@ struct priv { unsigned int reta_idx_n; /* RETA index size. */ struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */ TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */ + LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */ uint32_t link_speed_capa; /* Link speed capabilities. */ struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ @@ -289,4 +290,11 @@ void priv_flow_stop(struct priv *); int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *); int priv_flow_verify(struct priv *); +/* mlx5_mr.c */ + +struct mlx5_mr* priv_mr_new(struct priv *, struct rte_mempool *); +struct mlx5_mr* priv_mr_get(struct priv *, struct rte_mempool *); +int priv_mr_release(struct priv *, struct mlx5_mr *); +int priv_mr_verify(struct priv *); + #endif /* RTE_PMD_MLX5_H_ */ diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index e8adde5..b5e9500 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -42,6 +42,7 @@ #endif #include +#include #include "mlx5.h" #include "mlx5_rxtx.h" @@ -111,54 +112,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, } /** - * Register mempool as a memory region. - * - * @param pd - * Pointer to protection domain. - * @param mp - * Pointer to memory pool. - * - * @return - * Memory region pointer, NULL in case of error. - */ -struct ibv_mr * -mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp) -{ - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - uintptr_t start; - uintptr_t end; - unsigned int i; - - if (mlx5_check_mempool(mp, &start, &end) != 0) { - ERROR("mempool %p: not virtually contiguous", - (void *)mp); - return NULL; - } - - DEBUG("mempool %p area start=%p end=%p size=%zu", - (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); - /* Round start and end to page boundary if found in memory segments. */ - for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { - uintptr_t addr = (uintptr_t)ms[i].addr; - size_t len = ms[i].len; - unsigned int align = ms[i].hugepage_sz; - - if ((start > addr) && (start < addr + len)) - start = RTE_ALIGN_FLOOR(start, align); - if ((end > addr) && (end < addr + len)) - end = RTE_ALIGN_CEIL(end, align); - } - DEBUG("mempool %p using start=%p end=%p size=%zu for MR", - (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); - return ibv_reg_mr(pd, - (void *)start, - end - start, - IBV_ACCESS_LOCAL_WRITE); -} - -/** * Register a Memory Region (MR) <-> Memory Pool (MP) association in * txq->mp2mr[]. If mp2mr[] is full, remove an entry first. * @@ -180,12 +133,14 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp, { struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - struct ibv_mr *mr; + struct mlx5_mr *mr; /* Add a new entry, register MR first. */ DEBUG("%p: discovered new memory pool \"%s\" (%p)", (void *)txq_ctrl, mp->name, (void *)mp); - mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp); + mr = priv_mr_get(txq_ctrl->priv, mp); + if (mr == NULL) + mr = priv_mr_new(txq_ctrl->priv, mp); if (unlikely(mr == NULL)) { DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", (void *)txq_ctrl); @@ -196,20 +151,17 @@ mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp, DEBUG("%p: MR <-> MP table full, dropping oldest entry.", (void *)txq_ctrl); --idx; - claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr)); + priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[0]); memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1], (sizeof(txq_ctrl->txq.mp2mr) - sizeof(txq_ctrl->txq.mp2mr[0]))); } /* Store the new entry. */ - txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr; - txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length; - txq_ctrl->txq.mp2mr[idx].mr = mr; - txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey); + txq_ctrl->txq.mp2mr[idx] = mr; DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32, (void *)txq_ctrl, mp->name, (void *)mp, - txq_ctrl->txq.mp2mr[idx].lkey); - return txq_ctrl->txq.mp2mr[idx].lkey; + txq_ctrl->txq.mp2mr[idx]->lkey); + return txq_ctrl->txq.mp2mr[idx]->lkey; } struct txq_mp2mr_mbuf_check_data { @@ -275,15 +227,141 @@ mlx5_txq_mp2mr_iter(struct rte_mempool *mp, void *arg) return; } for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) { - struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr; - - if (unlikely(mr == NULL)) { + if (unlikely(txq_ctrl->txq.mp2mr[i] == NULL)) { /* Unknown MP, add a new MR for it. */ break; } - if (start >= (uintptr_t)mr->addr && - end <= (uintptr_t)mr->addr + mr->length) + if (start >= (uintptr_t)txq_ctrl->txq.mp2mr[i]->start && + end <= (uintptr_t)txq_ctrl->txq.mp2mr[i]->end) return; } mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mp, i); } + +/** + * Register a new memory region from the mempool and store it in the memory + * region list. + * + * @param priv + * Pointer to private structure. + * @param mp + * Pointer to the memory pool to register. + * @return + * The memory region on success. + */ +struct mlx5_mr* +priv_mr_new(struct priv *priv, struct rte_mempool *mp) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + uintptr_t start; + uintptr_t end; + unsigned int i; + struct mlx5_mr *mr; + + mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id); + if (!mr) { + DEBUG("unable to configure MR, ibv_reg_mr() failed."); + return NULL; + } + if (mlx5_check_mempool(mp, &start, &end) != 0) { + ERROR("mempool %p: not virtually contiguous", + (void *)mp); + return NULL; + } + DEBUG("mempool %p area start=%p end=%p size=%zu", + (void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + /* Round start and end to page boundary if found in memory segments. */ + for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { + uintptr_t addr = (uintptr_t)ms[i].addr; + size_t len = ms[i].len; + unsigned int align = ms[i].hugepage_sz; + + if ((start > addr) && (start < addr + len)) + start = RTE_ALIGN_FLOOR(start, align); + if ((end > addr) && (end < addr + len)) + end = RTE_ALIGN_CEIL(end, align); + } + DEBUG("mempool %p using start=%p end=%p size=%zu for MR", + (void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); + mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start, + IBV_ACCESS_LOCAL_WRITE); + mr->mp = mp; + mr->lkey = htonl(mr->mr->lkey); + mr->start = start; + mr->end = (uintptr_t)mr->mr->addr + mr->mr->length; + rte_atomic32_inc(&mr->refcnt); + LIST_INSERT_HEAD(&priv->mr, mr, next); + return mr; +} + +/** + * Search the memory region object in the memory region list. + * + * @param priv + * Pointer to private structure. + * @param mp + * Pointer to the memory pool to register. + * @return + * The memory region on success. + */ +struct mlx5_mr* +priv_mr_get(struct priv *priv, struct rte_mempool *mp) +{ + struct mlx5_mr *mr; + + if (LIST_EMPTY(&priv->mr)) + return NULL; + LIST_FOREACH(mr, &priv->mr, next) { + if (mr->mp == mp) { + rte_atomic32_inc(&mr->refcnt); + return mr; + } + } + return NULL; +} + +/** + * Release the memory region object. + * + * @param mr + * Pointer to memory region to release. + * + * @return + * 0 on success, errno on failure. + */ +int +priv_mr_release(struct priv *priv, struct mlx5_mr *mr) +{ + (void)priv; + if (rte_atomic32_dec_and_test(&mr->refcnt)) { + claim_zero(ibv_dereg_mr(mr->mr)); + LIST_REMOVE(mr, next); + rte_free(mr); + return 0; + } + return EBUSY; +} + +/** + * Verify the flow list is empty + * + * @param priv + * Pointer to private structure. + * + * @return the number of object not released. + */ +int +priv_mr_verify(struct priv *priv) +{ + int ret = 0; + struct mlx5_mr *mr; + + LIST_FOREACH(mr, &priv->mr, next) { + DEBUG("%p: mr %p still referenced", (void*)priv, + (void*)mr); + ++ret; + } + return ret; +} diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 88a024c..80cfd96 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -674,7 +674,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n) *scat = (struct mlx5_wqe_data_seg){ .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), .byte_count = htonl(DATA_LEN(buf)), - .lkey = htonl(rxq_ctrl->mr->lkey), + .lkey = rxq_ctrl->mr->lkey, }; (*rxq_ctrl->rxq.elts)[i] = buf; } @@ -768,7 +768,7 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) if (rxq_ctrl->channel != NULL) claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel)); if (rxq_ctrl->mr != NULL) - claim_zero(ibv_dereg_mr(rxq_ctrl->mr)); + priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr); memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); } @@ -920,12 +920,15 @@ mlx5_rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, tmpl.rxq.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum; /* Use the entire RX mempool as the memory region. */ - tmpl.mr = mlx5_mp2mr(priv->pd, mp); + tmpl.mr = priv_mr_get(priv, mp); if (tmpl.mr == NULL) { - ret = EINVAL; - ERROR("%p: MR creation failure: %s", - (void *)dev, strerror(ret)); - goto error; + tmpl.mr = priv_mr_new(priv, mp); + if (tmpl.mr == NULL) { + ret = EINVAL; + ERROR("%p: MR creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } } if (dev->data->dev_conf.intr_conf.rxq) { tmpl.channel = ibv_create_comp_channel(priv->ctx); diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 12366c5..c7c7518 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -36,6 +36,7 @@ #include #include +#include /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ @@ -51,6 +52,7 @@ #include #include #include +#include #include "mlx5_utils.h" #include "mlx5.h" @@ -79,6 +81,17 @@ struct mlx5_txq_stats { struct priv; +/* Memory region queue object. */ +struct mlx5_mr { + LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */ + rte_atomic32_t refcnt; /*<< Reference counter. */ + uint32_t lkey; /*<< htonl(mr->lkey) */ + uintptr_t start; /* Start address of MR */ + uintptr_t end; /* End address of MR */ + struct ibv_mr *mr; /*<< Memory Region. */ + struct rte_mempool *mp; /*<< Memory Pool. */ +}; + /* Compressed CQE context. */ struct rxq_zip { uint16_t ai; /* Array index. */ @@ -122,7 +135,7 @@ struct mlx5_rxq_ctrl { struct priv *priv; /* Back pointer to private data. */ struct ibv_cq *cq; /* Completion Queue. */ struct ibv_exp_wq *wq; /* Work Queue. */ - struct ibv_mr *mr; /* Memory Region (for mp). */ + struct mlx5_mr *mr; /* Memory Region (for mp). */ struct ibv_comp_channel *channel; unsigned int socket; /* CPU socket ID for allocations. */ struct mlx5_rxq_data rxq; /* Data path structure. */ @@ -248,6 +261,7 @@ struct mlx5_txq_data { uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint16_t inline_max_packet_sz; /* Max packet size for inlining. */ + uint16_t mr_cache_idx; /* Index of last hit entry. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ uint32_t flags; /* Flags for Tx Queue. */ volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ @@ -255,13 +269,7 @@ struct mlx5_txq_data { volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register. */ - struct { - uintptr_t start; /* Start address of MR */ - uintptr_t end; /* End address of MR */ - struct ibv_mr *mr; /* Memory Region (for mp). */ - uint32_t lkey; /* htonl(mr->lkey) */ - } mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */ - uint16_t mr_cache_idx; /* Index of last hit entry. */ + struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ } __rte_cache_aligned; @@ -553,20 +561,20 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t); assert(i < RTE_DIM(txq->mp2mr)); - if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr)) - return txq->mp2mr[i].lkey; + if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr)) + return txq->mp2mr[i]->lkey; for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { - if (unlikely(txq->mp2mr[i].mr == NULL)) { + if (unlikely(txq->mp2mr[i]->mr == NULL)) { /* Unknown MP, add a new MR for it. */ break; } - if (txq->mp2mr[i].start <= addr && - txq->mp2mr[i].end >= addr) { - assert(txq->mp2mr[i].lkey != (uint32_t)-1); - assert(htonl(txq->mp2mr[i].mr->lkey) == - txq->mp2mr[i].lkey); + if (txq->mp2mr[i]->start <= addr && + txq->mp2mr[i]->end >= addr) { + assert(txq->mp2mr[i]->lkey != (uint32_t)-1); + assert(htonl(txq->mp2mr[i]->mr->lkey) == + txq->mp2mr[i]->lkey); txq->mr_cache_idx = i; - return txq->mp2mr[i].lkey; + return txq->mp2mr[i]->lkey; } } txq->mr_cache_idx = 0; diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 3f6702a..ce826dd 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -140,11 +140,9 @@ mlx5_txq_cleanup(struct mlx5_txq_ctrl *txq_ctrl) claim_zero(ibv_destroy_qp(txq_ctrl->qp)); if (txq_ctrl->cq != NULL) claim_zero(ibv_destroy_cq(txq_ctrl->cq)); - for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) { - if (txq_ctrl->txq.mp2mr[i].mr == NULL) - break; - claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr)); - } + for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) + if (txq_ctrl->txq.mp2mr[i]) + priv_mr_release(txq_ctrl->priv, txq_ctrl->txq.mp2mr[i]); memset(txq_ctrl, 0, sizeof(*txq_ctrl)); } -- 2.1.4