From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id E52AD4C9C for ; Mon, 25 Mar 2019 20:36:39 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from yskoh@mellanox.com) with ESMTPS (AES256-SHA encrypted); 25 Mar 2019 21:36:35 +0200 Received: from scfae-sc-2.mti.labs.mlnx (scfae-sc-2.mti.labs.mlnx [10.101.0.96]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x2PJaTFI024575; Mon, 25 Mar 2019 21:36:34 +0200 From: Yongseok Koh To: shahafs@mellanox.com Cc: dev@dpdk.org Date: Mon, 25 Mar 2019 12:36:27 -0700 Message-Id: <20190325193627.19726-4-yskoh@mellanox.com> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20190325193627.19726-1-yskoh@mellanox.com> References: <20190325193627.19726-1-yskoh@mellanox.com> Subject: [dpdk-dev] [PATCH 3/3] net/mlx4: remove device register remap X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 25 Mar 2019 19:36:40 -0000 UAR (User Access Region) registers will be stored in a process-local table and a process accesses a register in a table entry with index. Alloc/free of table entry is managed by a global bitmap. When there's a need to store a UAR register such as Tx BlueFlame register for doorbell, an index should be allocated by mlx4_uar_alloc_index() and address of the allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the table can be expanded if overflowed. The local UAR register table doesn't cover all the indexes in the bitmap. This will be expanded if more indexes are allocated than the current size of the table. For example, the BlueFlame register for Tx doorbell has to be remapped on each secondary process. On initialization, primary process allocates an index for the UAR register table and stores the register address in the indexed entry of its own table when configuring a Tx queue. The index is stored in the shared memory(txq->bfreg_idx) and visiable to secondary processes. As secondary processes know the index, each process stores remapped register in the same indexed entry of its local UAR register table. On the datapath of each process, the register can be referenced simply by MLX4_UAR_REG(idx) which accesses its local UAR register table by the index. Signed-off-by: Yongseok Koh --- drivers/net/mlx4/mlx4.c | 274 ++++++++++++++++++++++++++++--------------- drivers/net/mlx4/mlx4.h | 22 +++- drivers/net/mlx4/mlx4_prm.h | 2 - drivers/net/mlx4/mlx4_rxtx.c | 2 +- drivers/net/mlx4/mlx4_rxtx.h | 3 +- drivers/net/mlx4/mlx4_txq.c | 102 ++++++---------- 6 files changed, 235 insertions(+), 170 deletions(-) diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index d913c2a47e..7749e0f9e4 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -63,7 +63,7 @@ struct mlx4_shared_data *mlx4_shared_data; static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER; /* Process local data for secondary processes. */ -static struct mlx4_local_data mlx4_local_data; +struct mlx4_local_data mlx4_local_data; /** Configuration structure for device arguments. */ struct mlx4_conf { @@ -267,11 +267,6 @@ mlx4_dev_start(struct rte_eth_dev *dev) return 0; DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); priv->started = 1; - ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd); - if (ret) { - ERROR("%p: cannot remap UAR", (void *)dev); - goto err; - } ret = mlx4_rss_init(priv); if (ret) { ERROR("%p: cannot initialize RSS resources: %s", @@ -319,8 +314,6 @@ static void mlx4_dev_stop(struct rte_eth_dev *dev) { struct mlx4_priv *priv = dev->data->dev_private; - const size_t page_size = sysconf(_SC_PAGESIZE); - int i; if (!priv->started) return; @@ -334,15 +327,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev) mlx4_flow_sync(priv, NULL); mlx4_rxq_intr_disable(priv); mlx4_rss_deinit(priv); - for (i = 0; i != dev->data->nb_tx_queues; ++i) { - struct txq *txq; - - txq = dev->data->tx_queues[i]; - if (!txq) - continue; - munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db, - page_size), page_size); - } } /** @@ -669,128 +653,224 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, static struct rte_pci_driver mlx4_driver; +/** + * Expand the local UAR register table. + * + * @param size + * Size of the table to be expanded + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ static int -find_lower_va_bound(const struct rte_memseg_list *msl, - const struct rte_memseg *ms, void *arg) +uar_expand_table(uint32_t size) { - void **addr = arg; + struct mlx4_local_data *ld = &mlx4_local_data; + void *mem; + size_t tbl_sz = ld->uar_table_sz; - if (msl->external) + if (size <= tbl_sz) return 0; - if (*addr == NULL) - *addr = ms->addr; - else - *addr = RTE_MIN(*addr, ms->addr); - + tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE); + mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *), + RTE_CACHE_LINE_SIZE); + if (!mem) { + rte_errno = ENOMEM; + ERROR("failed to expand uar table"); + return -rte_errno; + } + DEBUG("UAR reg. table is expanded to %zu", tbl_sz); + ld->uar_table = mem; + ld->uar_table_sz = tbl_sz; return 0; } /** - * Reserve UAR address space for primary process. + * Return the pointer of the indexed slot in the local UAR register table. * - * Process local resource is used by both primary and secondary to avoid - * duplicate reservation. The space has to be available on both primary and - * secondary process, TXQ UAR maps to this area using fixed mmap w/o double - * check. + * The indexed slot must be allocated by mlx4_uar_alloc_index() in advance. And + * the table will be expanded if overflowed. + * + * @param idx + * Index of the table. * * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. + * Pointer of table entry on success, NULL otherwise and rte_errno is set. */ -static int -mlx4_uar_init_primary(void) +void ** +mlx4_uar_get_addr_ptr(uint32_t idx) +{ + struct mlx4_local_data *ld = &mlx4_local_data; + int ret; + + assert(idx < MLX4_UAR_TABLE_SIZE_MAX); + if (idx >= ld->uar_table_sz) { + ret = uar_expand_table(idx + 1); + if (ret) + return NULL; + } + return &(*ld->uar_table)[idx]; +} + +/** + * Allocate a slot of UAR register table. + * + * Allocation is done by scanning the global bitmap. The global spinlock should + * be held. + * + * @return + * Index of a free slot on success, a negative errno value otherwise and + * rte_errno is set. + */ +uint32_t +mlx4_uar_alloc_index(void) { struct mlx4_shared_data *sd = mlx4_shared_data; - void *addr = (void *)0; + uint32_t idx = 0; + uint64_t slab = 0; + int ret; - if (sd->uar_base) - return 0; - /* find out lower bound of hugepage segments */ - rte_memseg_walk(find_lower_va_bound, &addr); - /* keep distance to hugepages to minimize potential conflicts. */ - addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE)); - /* anonymous mmap, no real memory consumption. */ - addr = mmap(addr, MLX4_UAR_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ERROR("failed to reserve UAR address space, please" - " adjust MLX4_UAR_SIZE or try --base-virtaddr"); - rte_errno = ENOMEM; + assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + rte_spinlock_lock(&sd->lock); + __rte_bitmap_scan_init(sd->uar_bmp); + ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab); + if (unlikely(!ret)) { + /* + * This cannot happen unless there are unreasonably large number + * of queues and ports. + */ + rte_errno = ENOSPC; + rte_spinlock_unlock(&sd->lock); return -rte_errno; } - /* Accept either same addr or a new addr returned from mmap if target - * range occupied. - */ - INFO("reserved UAR address space: %p", addr); - sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */ - return 0; + idx += __builtin_ctzll(slab); + /* Mark the slot is occupied. */ + rte_bitmap_clear(sd->uar_bmp, idx); + rte_spinlock_unlock(&sd->lock); + DEBUG("index %d is allocated in UAR reg. table", idx); + return idx; } /** - * Unmap UAR address space reserved for primary process. + * Free a slot of UAR register table. */ -static void -mlx4_uar_uninit_primary(void) +void +mlx4_uar_free_index(uint32_t idx) { struct mlx4_shared_data *sd = mlx4_shared_data; - if (!sd->uar_base) - return; - munmap(sd->uar_base, MLX4_UAR_SIZE); - sd->uar_base = NULL; + assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + assert(idx < MLX4_UAR_TABLE_SIZE_MAX); + rte_spinlock_lock(&sd->lock); + /* Mark the slot is empty. */ + rte_bitmap_set(sd->uar_bmp, idx); + rte_spinlock_unlock(&sd->lock); + DEBUG("index %d is freed in UAR reg. table", idx); } /** - * Reserve UAR address space for secondary process, align with primary process. + * Initialize UAR register table bitmap. + * + * UAR registers will be stored in a process-local table and the table is + * managed by a global bitmap. When there's a need to store a UAR register, an + * index should be allocated by mlx4_uar_alloc_index() and address of the + * allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the + * table can be expanded if overflowed. + * + * The local UAR register table doesn't cover all the indexes in the bitmap. + * This will be expanded if more indexes are allocated than the current size of + * the table. + * + * Secondary process should have reference of the index and store remapped + * register at the same index in its local UAR register table. + * + * On the datapath of each process, the register can be referenced simply by + * MLX4_UAR_REG(idx). * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_uar_init_secondary(void) +uar_init_primary(void) { struct mlx4_shared_data *sd = mlx4_shared_data; - struct mlx4_local_data *ld = &mlx4_local_data; - void *addr; + struct rte_bitmap *bmp; + void *bmp_mem; + uint32_t bmp_size; + unsigned int i; - if (ld->uar_base) { /* Already reserved. */ - assert(sd->uar_base == ld->uar_base); - return 0; - } - assert(sd->uar_base); - /* anonymous mmap, no real memory consumption. */ - addr = mmap(sd->uar_base, MLX4_UAR_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ERROR("UAR mmap failed: %p size: %llu", - sd->uar_base, MLX4_UAR_SIZE); - rte_errno = ENXIO; + bmp_size = rte_bitmap_get_memory_footprint(MLX4_UAR_TABLE_SIZE_MAX); + bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE); + if (!bmp_mem) { + rte_errno = ENOMEM; + ERROR("failed to allocate memory for uar table"); return -rte_errno; } - if (sd->uar_base != addr) { - ERROR("UAR address %p size %llu occupied, please" - " adjust MLX4_UAR_OFFSET or try EAL parameter" - " --base-virtaddr", - sd->uar_base, MLX4_UAR_SIZE); - rte_errno = ENXIO; - return -rte_errno; + bmp = rte_bitmap_init(MLX4_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size); + /* Set the entire bitmap as 1 means vacant and 0 means empty. */ + for (i = 0; i < bmp->array2_size; ++i) + rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1); + sd->uar_bmp = bmp; + return 0; +} + +/** + * Un-initialize UAR register resources. + * + * The global bitmap and the register table of primary process are freed. + */ +static void +uar_uninit_primary(void) +{ + struct mlx4_shared_data *sd = mlx4_shared_data; + struct mlx4_local_data *ld = &mlx4_local_data; + + if (sd->uar_bmp) { + rte_bitmap_free(sd->uar_bmp); + rte_free(sd->uar_bmp); + sd->uar_bmp = NULL; + } + /* Free primary's table. */ + if (ld->uar_table) { + rte_free(ld->uar_table); + ld->uar_table = NULL; + ld->uar_table_sz = 0; } - ld->uar_base = addr; - INFO("reserved UAR address space: %p", addr); +} + +/** + * Initialize UAR register resources for secondary process. + * + * Allocate the local UAR register table. Initially, the number of entries is + * same as the size of a bitmap slab. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +uar_init_secondary(void) +{ + /* Prepare at least a bitmap slab. */ + uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE); return 0; } /** - * Unmap UAR address space reserved for secondary process. + * Un-initialize UAR register resources for secondary process. + * + * The local UAR register table is freed. */ static void -mlx4_uar_uninit_secondary(void) +uar_uninit_secondary(void) { struct mlx4_local_data *ld = &mlx4_local_data; - if (!ld->uar_base) - return; - munmap(ld->uar_base, MLX4_UAR_SIZE); - ld->uar_base = NULL; + /* Free process-local table. */ + if (ld->uar_table) { + rte_free(ld->uar_table); + ld->uar_table = NULL; + ld->uar_table_sz = 0; + } } /** @@ -824,7 +904,7 @@ mlx4_init_once(void) rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", mlx4_mr_mem_event_cb, NULL); mlx4_mp_init_primary(); - ret = mlx4_uar_init_primary(); + ret = uar_init_primary(); if (ret) goto error; sd->init_done = true; @@ -833,7 +913,7 @@ mlx4_init_once(void) if (ld->init_done) break; mlx4_mp_init_secondary(); - ret = mlx4_uar_init_secondary(); + ret = uar_init_secondary(); if (ret) goto error; ++sd->secondary_cnt; @@ -847,12 +927,12 @@ mlx4_init_once(void) error: switch (rte_eal_process_type()) { case RTE_PROC_PRIMARY: - mlx4_uar_uninit_primary(); + uar_uninit_primary(); mlx4_mp_uninit_primary(); rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL); break; case RTE_PROC_SECONDARY: - mlx4_uar_uninit_secondary(); + uar_uninit_secondary(); mlx4_mp_uninit_secondary(); break; default: @@ -1011,7 +1091,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) goto error; } /* Remap UAR for Tx queues. */ - err = mlx4_tx_uar_remap(eth_dev, err); + err = mlx4_txq_uar_init_secondary(eth_dev, err); if (err) { err = rte_errno; goto error; diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 3881943ef0..977866e017 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -137,8 +137,8 @@ struct mlx4_shared_data { /* Global spinlock for primary and secondary processes. */ int init_done; /* Whether primary has done initialization. */ unsigned int secondary_cnt; /* Number of secondary processes init'd. */ - void *uar_base; - /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */ + struct rte_bitmap *uar_bmp; + /* Bitmap to keep track of BlueFlame register table. */ struct mlx4_dev_list mem_event_cb_list; rte_rwlock_t mem_event_rwlock; }; @@ -146,11 +146,19 @@ struct mlx4_shared_data { /* Per-process data structure, not visible to other processes. */ struct mlx4_local_data { int init_done; /* Whether a secondary has done initialization. */ - void *uar_base; - /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */ + void *(*uar_table)[]; + /* Table of BlueFlame registers for each process. */ + size_t uar_table_sz; + /* Size of BlueFlame register table. */ }; extern struct mlx4_shared_data *mlx4_shared_data; +extern struct mlx4_local_data mlx4_local_data; + +/* The maximum size of BlueFlame register table. */ +#define MLX4_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT) + +#define MLX4_UAR_REG(idx) ((*mlx4_local_data.uar_table)[(idx)]) /** Private data structure. */ struct mlx4_priv { @@ -197,6 +205,12 @@ struct mlx4_priv { #define PORT_ID(priv) ((priv)->dev_data->port_id) #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)]) +/* mlx4.c */ + +void **mlx4_uar_get_addr_ptr(uint32_t idx); +uint32_t mlx4_uar_alloc_index(void); +void mlx4_uar_free_index(uint32_t idx); + /* mlx4_ethdev.c */ int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]); diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h index b3e11dde25..06ad92d391 100644 --- a/drivers/net/mlx4/mlx4_prm.h +++ b/drivers/net/mlx4/mlx4_prm.h @@ -77,8 +77,6 @@ struct mlx4_sq { uint32_t owner_opcode; /**< Default owner opcode with HW valid owner bit. */ uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */ - volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */ - volatile uint32_t *db; /**< Pointer to the doorbell remapped. */ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */ }; diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c index f22f1ba559..513c8a61bf 100644 --- a/drivers/net/mlx4/mlx4_rxtx.c +++ b/drivers/net/mlx4/mlx4_rxtx.c @@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Make sure that descriptors are written before doorbell record. */ rte_wmb(); /* Ring QP doorbell. */ - rte_write32(txq->msq.doorbell_qpn, txq->msq.db); + rte_write32(txq->msq.doorbell_qpn, MLX4_UAR_REG(txq->bfreg_idx)); txq->elts_head += i; return i; } diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h index 7d7a8988ed..d9df98715e 100644 --- a/drivers/net/mlx4/mlx4_rxtx.h +++ b/drivers/net/mlx4/mlx4_rxtx.h @@ -97,6 +97,7 @@ struct mlx4_txq_stats { struct txq { struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */ struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */ + uint32_t bfreg_idx; /**< Blueflame register index. */ unsigned int elts_head; /**< Current index in (*elts)[]. */ unsigned int elts_tail; /**< First element awaiting completion. */ int elts_comp_cd; /**< Countdown for next completion. */ @@ -152,7 +153,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, /* mlx4_txq.c */ -int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd); +int mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd); uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv); int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int socket, diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c index ed00843425..51d74d6c80 100644 --- a/drivers/net/mlx4/mlx4_txq.c +++ b/drivers/net/mlx4/mlx4_txq.c @@ -39,10 +39,15 @@ #include "mlx4_rxtx.h" #include "mlx4_utils.h" +#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET /** - * Mmap TX UAR(HW doorbell) pages into reserved UAR address space. - * Both primary and secondary process do mmap to make UAR address - * aligned. + * Initialize UAR register access for Tx. + * + * Primary process shouldn't call this function. + * + * For secondary, remap BlueFlame registers for secondary process. Remapped + * address is stored at the same indexed entry of the local UAR register table + * as primary process. * * @param[in] dev * Pointer to Ethernet device. @@ -52,83 +57,41 @@ * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ -#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET int -mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd) +mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd) { - unsigned int i, j; const unsigned int txqs_n = dev->data->nb_tx_queues; - uintptr_t pages[txqs_n]; - unsigned int pages_n = 0; - uintptr_t uar_va; - uintptr_t off; - void *addr; - void *ret; struct txq *txq; - int already_mapped; + void *addr; + void **addr_ptr; size_t page_size = sysconf(_SC_PAGESIZE); + unsigned int i; - memset(pages, 0, txqs_n * sizeof(uintptr_t)); + assert(rte_eal_process_type() == RTE_PROC_SECONDARY); /* * As rdma-core, UARs are mapped in size of OS page size. - * Use aligned address to avoid duplicate mmap. * Ref to libmlx4 function: mlx4_init_context() */ for (i = 0; i != txqs_n; ++i) { txq = dev->data->tx_queues[i]; if (!txq) continue; - /* UAR addr form verbs used to find dup and offset in page. */ - uar_va = (uintptr_t)txq->msq.qp_sdb; - off = uar_va & (page_size - 1); /* offset in page. */ - uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */ - already_mapped = 0; - for (j = 0; j != pages_n; ++j) { - if (pages[j] == uar_va) { - already_mapped = 1; - break; - } - } - /* new address in reserved UAR address space. */ - addr = RTE_PTR_ADD(mlx4_shared_data->uar_base, - uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1)); - if (!already_mapped) { - pages[pages_n++] = uar_va; - /* fixed mmap to specified address in reserved - * address space. - */ - ret = mmap(addr, page_size, - PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, - txq->msq.uar_mmap_offset); - if (ret != addr) { - /* fixed mmap has to return same address. */ - ERROR("port %u call to mmap failed on UAR" - " for txq %u", - dev->data->port_id, i); - rte_errno = ENXIO; - return -rte_errno; - } + addr = mmap(NULL, page_size, PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, + txq->msq.uar_mmap_offset); + if (addr == MAP_FAILED) { + ERROR("port %u mmap failed for BF reg. of txq %u", + dev->data->port_id, i); + rte_errno = ENXIO; + return -rte_errno; } - if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */ - txq->msq.db = RTE_PTR_ADD((void *)addr, off); - else - assert(txq->msq.db == - RTE_PTR_ADD((void *)addr, off)); + addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx); + if (!addr_ptr) + return -rte_errno; + *addr_ptr = addr; } return 0; } -#else -int -mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused) -{ - /* - * If rdma-core doesn't support UAR remap, secondary process is not - * supported, thus secondary cannot call this function but only primary - * makes a call. Return success to not interrupt initialization. - */ - assert(rte_eal_process_type() == RTE_PROC_PRIMARY); - return 0; -} #endif /** @@ -185,10 +148,8 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv) (0u << MLX4_SQ_OWNER_BIT)); #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET sq->uar_mmap_offset = dqp->uar_mmap_offset; - sq->qp_sdb = dqp->sdb; -#else - sq->db = dqp->sdb; #endif + *mlx4_uar_get_addr_ptr(txq->bfreg_idx) = dqp->sdb; sq->doorbell_qpn = dqp->doorbell_qpn; cq->buf = dcq->buf.buf; cq->cqe_cnt = dcq->cqe_cnt; @@ -255,6 +216,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, struct ibv_qp_init_attr qp_init_attr; struct txq *txq; uint8_t *bounce_buf; + void **addr_ptr; struct mlx4_malloc_vec vec[] = { { .align = RTE_CACHE_LINE_SIZE, @@ -429,6 +391,15 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, goto error; } #endif + /* Allocate a new index in UAR table. */ + ret = mlx4_uar_alloc_index(); + if (ret < 0) + goto error; + txq->bfreg_idx = ret; + /* Make sure the local UAR register table is properly expanded. */ + addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx); + if (!addr_ptr) + goto error; mlx4_txq_fill_dv_obj_info(txq, &mlxdv); /* Save first wqe pointer in the first element. */ (&(*txq->elts)[0])->wqe = @@ -478,6 +449,7 @@ mlx4_tx_queue_release(void *dpdk_txq) break; } mlx4_txq_free_elts(txq); + mlx4_uar_free_index(txq->bfreg_idx); if (txq->qp) claim_zero(mlx4_glue->destroy_qp(txq->qp)); if (txq->cq) -- 2.11.0 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id 8B23FA05D3 for ; Mon, 25 Mar 2019 20:37:04 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 8E4A14F9A; Mon, 25 Mar 2019 20:36:45 +0100 (CET) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id E52AD4C9C for ; Mon, 25 Mar 2019 20:36:39 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from yskoh@mellanox.com) with ESMTPS (AES256-SHA encrypted); 25 Mar 2019 21:36:35 +0200 Received: from scfae-sc-2.mti.labs.mlnx (scfae-sc-2.mti.labs.mlnx [10.101.0.96]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x2PJaTFI024575; Mon, 25 Mar 2019 21:36:34 +0200 From: Yongseok Koh To: shahafs@mellanox.com Cc: dev@dpdk.org Date: Mon, 25 Mar 2019 12:36:27 -0700 Message-Id: <20190325193627.19726-4-yskoh@mellanox.com> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20190325193627.19726-1-yskoh@mellanox.com> References: <20190325193627.19726-1-yskoh@mellanox.com> Subject: [dpdk-dev] [PATCH 3/3] net/mlx4: remove device register remap X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Content-Type: text/plain; charset="UTF-8" Message-ID: <20190325193627.pZcQV5VYT52ejP9LWwW7AVOpHPWhzgOnFHOhJs-c9Zc@z> UAR (User Access Region) registers will be stored in a process-local table and a process accesses a register in a table entry with index. Alloc/free of table entry is managed by a global bitmap. When there's a need to store a UAR register such as Tx BlueFlame register for doorbell, an index should be allocated by mlx4_uar_alloc_index() and address of the allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the table can be expanded if overflowed. The local UAR register table doesn't cover all the indexes in the bitmap. This will be expanded if more indexes are allocated than the current size of the table. For example, the BlueFlame register for Tx doorbell has to be remapped on each secondary process. On initialization, primary process allocates an index for the UAR register table and stores the register address in the indexed entry of its own table when configuring a Tx queue. The index is stored in the shared memory(txq->bfreg_idx) and visiable to secondary processes. As secondary processes know the index, each process stores remapped register in the same indexed entry of its local UAR register table. On the datapath of each process, the register can be referenced simply by MLX4_UAR_REG(idx) which accesses its local UAR register table by the index. Signed-off-by: Yongseok Koh --- drivers/net/mlx4/mlx4.c | 274 ++++++++++++++++++++++++++++--------------- drivers/net/mlx4/mlx4.h | 22 +++- drivers/net/mlx4/mlx4_prm.h | 2 - drivers/net/mlx4/mlx4_rxtx.c | 2 +- drivers/net/mlx4/mlx4_rxtx.h | 3 +- drivers/net/mlx4/mlx4_txq.c | 102 ++++++---------- 6 files changed, 235 insertions(+), 170 deletions(-) diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index d913c2a47e..7749e0f9e4 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -63,7 +63,7 @@ struct mlx4_shared_data *mlx4_shared_data; static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER; /* Process local data for secondary processes. */ -static struct mlx4_local_data mlx4_local_data; +struct mlx4_local_data mlx4_local_data; /** Configuration structure for device arguments. */ struct mlx4_conf { @@ -267,11 +267,6 @@ mlx4_dev_start(struct rte_eth_dev *dev) return 0; DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); priv->started = 1; - ret = mlx4_tx_uar_remap(dev, priv->ctx->cmd_fd); - if (ret) { - ERROR("%p: cannot remap UAR", (void *)dev); - goto err; - } ret = mlx4_rss_init(priv); if (ret) { ERROR("%p: cannot initialize RSS resources: %s", @@ -319,8 +314,6 @@ static void mlx4_dev_stop(struct rte_eth_dev *dev) { struct mlx4_priv *priv = dev->data->dev_private; - const size_t page_size = sysconf(_SC_PAGESIZE); - int i; if (!priv->started) return; @@ -334,15 +327,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev) mlx4_flow_sync(priv, NULL); mlx4_rxq_intr_disable(priv); mlx4_rss_deinit(priv); - for (i = 0; i != dev->data->nb_tx_queues; ++i) { - struct txq *txq; - - txq = dev->data->tx_queues[i]; - if (!txq) - continue; - munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->msq.db, - page_size), page_size); - } } /** @@ -669,128 +653,224 @@ mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, static struct rte_pci_driver mlx4_driver; +/** + * Expand the local UAR register table. + * + * @param size + * Size of the table to be expanded + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ static int -find_lower_va_bound(const struct rte_memseg_list *msl, - const struct rte_memseg *ms, void *arg) +uar_expand_table(uint32_t size) { - void **addr = arg; + struct mlx4_local_data *ld = &mlx4_local_data; + void *mem; + size_t tbl_sz = ld->uar_table_sz; - if (msl->external) + if (size <= tbl_sz) return 0; - if (*addr == NULL) - *addr = ms->addr; - else - *addr = RTE_MIN(*addr, ms->addr); - + tbl_sz = RTE_ALIGN_CEIL(size, RTE_BITMAP_SLAB_BIT_SIZE); + mem = rte_realloc(ld->uar_table, tbl_sz * sizeof(void *), + RTE_CACHE_LINE_SIZE); + if (!mem) { + rte_errno = ENOMEM; + ERROR("failed to expand uar table"); + return -rte_errno; + } + DEBUG("UAR reg. table is expanded to %zu", tbl_sz); + ld->uar_table = mem; + ld->uar_table_sz = tbl_sz; return 0; } /** - * Reserve UAR address space for primary process. + * Return the pointer of the indexed slot in the local UAR register table. * - * Process local resource is used by both primary and secondary to avoid - * duplicate reservation. The space has to be available on both primary and - * secondary process, TXQ UAR maps to this area using fixed mmap w/o double - * check. + * The indexed slot must be allocated by mlx4_uar_alloc_index() in advance. And + * the table will be expanded if overflowed. + * + * @param idx + * Index of the table. * * @return - * 0 on success, a negative errno value otherwise and rte_errno is set. + * Pointer of table entry on success, NULL otherwise and rte_errno is set. */ -static int -mlx4_uar_init_primary(void) +void ** +mlx4_uar_get_addr_ptr(uint32_t idx) +{ + struct mlx4_local_data *ld = &mlx4_local_data; + int ret; + + assert(idx < MLX4_UAR_TABLE_SIZE_MAX); + if (idx >= ld->uar_table_sz) { + ret = uar_expand_table(idx + 1); + if (ret) + return NULL; + } + return &(*ld->uar_table)[idx]; +} + +/** + * Allocate a slot of UAR register table. + * + * Allocation is done by scanning the global bitmap. The global spinlock should + * be held. + * + * @return + * Index of a free slot on success, a negative errno value otherwise and + * rte_errno is set. + */ +uint32_t +mlx4_uar_alloc_index(void) { struct mlx4_shared_data *sd = mlx4_shared_data; - void *addr = (void *)0; + uint32_t idx = 0; + uint64_t slab = 0; + int ret; - if (sd->uar_base) - return 0; - /* find out lower bound of hugepage segments */ - rte_memseg_walk(find_lower_va_bound, &addr); - /* keep distance to hugepages to minimize potential conflicts. */ - addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX4_UAR_OFFSET + MLX4_UAR_SIZE)); - /* anonymous mmap, no real memory consumption. */ - addr = mmap(addr, MLX4_UAR_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ERROR("failed to reserve UAR address space, please" - " adjust MLX4_UAR_SIZE or try --base-virtaddr"); - rte_errno = ENOMEM; + assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + rte_spinlock_lock(&sd->lock); + __rte_bitmap_scan_init(sd->uar_bmp); + ret = rte_bitmap_scan(sd->uar_bmp, &idx, &slab); + if (unlikely(!ret)) { + /* + * This cannot happen unless there are unreasonably large number + * of queues and ports. + */ + rte_errno = ENOSPC; + rte_spinlock_unlock(&sd->lock); return -rte_errno; } - /* Accept either same addr or a new addr returned from mmap if target - * range occupied. - */ - INFO("reserved UAR address space: %p", addr); - sd->uar_base = addr; /* for primary and secondary UAR re-mmap. */ - return 0; + idx += __builtin_ctzll(slab); + /* Mark the slot is occupied. */ + rte_bitmap_clear(sd->uar_bmp, idx); + rte_spinlock_unlock(&sd->lock); + DEBUG("index %d is allocated in UAR reg. table", idx); + return idx; } /** - * Unmap UAR address space reserved for primary process. + * Free a slot of UAR register table. */ -static void -mlx4_uar_uninit_primary(void) +void +mlx4_uar_free_index(uint32_t idx) { struct mlx4_shared_data *sd = mlx4_shared_data; - if (!sd->uar_base) - return; - munmap(sd->uar_base, MLX4_UAR_SIZE); - sd->uar_base = NULL; + assert(rte_eal_process_type() == RTE_PROC_PRIMARY); + assert(idx < MLX4_UAR_TABLE_SIZE_MAX); + rte_spinlock_lock(&sd->lock); + /* Mark the slot is empty. */ + rte_bitmap_set(sd->uar_bmp, idx); + rte_spinlock_unlock(&sd->lock); + DEBUG("index %d is freed in UAR reg. table", idx); } /** - * Reserve UAR address space for secondary process, align with primary process. + * Initialize UAR register table bitmap. + * + * UAR registers will be stored in a process-local table and the table is + * managed by a global bitmap. When there's a need to store a UAR register, an + * index should be allocated by mlx4_uar_alloc_index() and address of the + * allocated table entry must be acquired by mlx4_uar_get_addr_ptr() so that the + * table can be expanded if overflowed. + * + * The local UAR register table doesn't cover all the indexes in the bitmap. + * This will be expanded if more indexes are allocated than the current size of + * the table. + * + * Secondary process should have reference of the index and store remapped + * register at the same index in its local UAR register table. + * + * On the datapath of each process, the register can be referenced simply by + * MLX4_UAR_REG(idx). * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_uar_init_secondary(void) +uar_init_primary(void) { struct mlx4_shared_data *sd = mlx4_shared_data; - struct mlx4_local_data *ld = &mlx4_local_data; - void *addr; + struct rte_bitmap *bmp; + void *bmp_mem; + uint32_t bmp_size; + unsigned int i; - if (ld->uar_base) { /* Already reserved. */ - assert(sd->uar_base == ld->uar_base); - return 0; - } - assert(sd->uar_base); - /* anonymous mmap, no real memory consumption. */ - addr = mmap(sd->uar_base, MLX4_UAR_SIZE, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - ERROR("UAR mmap failed: %p size: %llu", - sd->uar_base, MLX4_UAR_SIZE); - rte_errno = ENXIO; + bmp_size = rte_bitmap_get_memory_footprint(MLX4_UAR_TABLE_SIZE_MAX); + bmp_mem = rte_zmalloc("uar_table", bmp_size, RTE_CACHE_LINE_SIZE); + if (!bmp_mem) { + rte_errno = ENOMEM; + ERROR("failed to allocate memory for uar table"); return -rte_errno; } - if (sd->uar_base != addr) { - ERROR("UAR address %p size %llu occupied, please" - " adjust MLX4_UAR_OFFSET or try EAL parameter" - " --base-virtaddr", - sd->uar_base, MLX4_UAR_SIZE); - rte_errno = ENXIO; - return -rte_errno; + bmp = rte_bitmap_init(MLX4_UAR_TABLE_SIZE_MAX, bmp_mem, bmp_size); + /* Set the entire bitmap as 1 means vacant and 0 means empty. */ + for (i = 0; i < bmp->array2_size; ++i) + rte_bitmap_set_slab(bmp, i * RTE_BITMAP_SLAB_BIT_SIZE, -1); + sd->uar_bmp = bmp; + return 0; +} + +/** + * Un-initialize UAR register resources. + * + * The global bitmap and the register table of primary process are freed. + */ +static void +uar_uninit_primary(void) +{ + struct mlx4_shared_data *sd = mlx4_shared_data; + struct mlx4_local_data *ld = &mlx4_local_data; + + if (sd->uar_bmp) { + rte_bitmap_free(sd->uar_bmp); + rte_free(sd->uar_bmp); + sd->uar_bmp = NULL; + } + /* Free primary's table. */ + if (ld->uar_table) { + rte_free(ld->uar_table); + ld->uar_table = NULL; + ld->uar_table_sz = 0; } - ld->uar_base = addr; - INFO("reserved UAR address space: %p", addr); +} + +/** + * Initialize UAR register resources for secondary process. + * + * Allocate the local UAR register table. Initially, the number of entries is + * same as the size of a bitmap slab. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +uar_init_secondary(void) +{ + /* Prepare at least a bitmap slab. */ + uar_expand_table(RTE_BITMAP_SLAB_BIT_SIZE); return 0; } /** - * Unmap UAR address space reserved for secondary process. + * Un-initialize UAR register resources for secondary process. + * + * The local UAR register table is freed. */ static void -mlx4_uar_uninit_secondary(void) +uar_uninit_secondary(void) { struct mlx4_local_data *ld = &mlx4_local_data; - if (!ld->uar_base) - return; - munmap(ld->uar_base, MLX4_UAR_SIZE); - ld->uar_base = NULL; + /* Free process-local table. */ + if (ld->uar_table) { + rte_free(ld->uar_table); + ld->uar_table = NULL; + ld->uar_table_sz = 0; + } } /** @@ -824,7 +904,7 @@ mlx4_init_once(void) rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", mlx4_mr_mem_event_cb, NULL); mlx4_mp_init_primary(); - ret = mlx4_uar_init_primary(); + ret = uar_init_primary(); if (ret) goto error; sd->init_done = true; @@ -833,7 +913,7 @@ mlx4_init_once(void) if (ld->init_done) break; mlx4_mp_init_secondary(); - ret = mlx4_uar_init_secondary(); + ret = uar_init_secondary(); if (ret) goto error; ++sd->secondary_cnt; @@ -847,12 +927,12 @@ mlx4_init_once(void) error: switch (rte_eal_process_type()) { case RTE_PROC_PRIMARY: - mlx4_uar_uninit_primary(); + uar_uninit_primary(); mlx4_mp_uninit_primary(); rte_mem_event_callback_unregister("MLX4_MEM_EVENT_CB", NULL); break; case RTE_PROC_SECONDARY: - mlx4_uar_uninit_secondary(); + uar_uninit_secondary(); mlx4_mp_uninit_secondary(); break; default: @@ -1011,7 +1091,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) goto error; } /* Remap UAR for Tx queues. */ - err = mlx4_tx_uar_remap(eth_dev, err); + err = mlx4_txq_uar_init_secondary(eth_dev, err); if (err) { err = rte_errno; goto error; diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 3881943ef0..977866e017 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -137,8 +137,8 @@ struct mlx4_shared_data { /* Global spinlock for primary and secondary processes. */ int init_done; /* Whether primary has done initialization. */ unsigned int secondary_cnt; /* Number of secondary processes init'd. */ - void *uar_base; - /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */ + struct rte_bitmap *uar_bmp; + /* Bitmap to keep track of BlueFlame register table. */ struct mlx4_dev_list mem_event_cb_list; rte_rwlock_t mem_event_rwlock; }; @@ -146,11 +146,19 @@ struct mlx4_shared_data { /* Per-process data structure, not visible to other processes. */ struct mlx4_local_data { int init_done; /* Whether a secondary has done initialization. */ - void *uar_base; - /* Reserved UAR address space for TXQ UAR(hw doorbell) mapping. */ + void *(*uar_table)[]; + /* Table of BlueFlame registers for each process. */ + size_t uar_table_sz; + /* Size of BlueFlame register table. */ }; extern struct mlx4_shared_data *mlx4_shared_data; +extern struct mlx4_local_data mlx4_local_data; + +/* The maximum size of BlueFlame register table. */ +#define MLX4_UAR_TABLE_SIZE_MAX (RTE_MAX_ETHPORTS * RTE_MAX_QUEUES_PER_PORT) + +#define MLX4_UAR_REG(idx) ((*mlx4_local_data.uar_table)[(idx)]) /** Private data structure. */ struct mlx4_priv { @@ -197,6 +205,12 @@ struct mlx4_priv { #define PORT_ID(priv) ((priv)->dev_data->port_id) #define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)]) +/* mlx4.c */ + +void **mlx4_uar_get_addr_ptr(uint32_t idx); +uint32_t mlx4_uar_alloc_index(void); +void mlx4_uar_free_index(uint32_t idx); + /* mlx4_ethdev.c */ int mlx4_get_ifname(const struct mlx4_priv *priv, char (*ifname)[IF_NAMESIZE]); diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h index b3e11dde25..06ad92d391 100644 --- a/drivers/net/mlx4/mlx4_prm.h +++ b/drivers/net/mlx4/mlx4_prm.h @@ -77,8 +77,6 @@ struct mlx4_sq { uint32_t owner_opcode; /**< Default owner opcode with HW valid owner bit. */ uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */ - volatile uint32_t *qp_sdb; /**< Pointer to the doorbell. */ - volatile uint32_t *db; /**< Pointer to the doorbell remapped. */ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */ }; diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c index f22f1ba559..513c8a61bf 100644 --- a/drivers/net/mlx4/mlx4_rxtx.c +++ b/drivers/net/mlx4/mlx4_rxtx.c @@ -1048,7 +1048,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Make sure that descriptors are written before doorbell record. */ rte_wmb(); /* Ring QP doorbell. */ - rte_write32(txq->msq.doorbell_qpn, txq->msq.db); + rte_write32(txq->msq.doorbell_qpn, MLX4_UAR_REG(txq->bfreg_idx)); txq->elts_head += i; return i; } diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h index 7d7a8988ed..d9df98715e 100644 --- a/drivers/net/mlx4/mlx4_rxtx.h +++ b/drivers/net/mlx4/mlx4_rxtx.h @@ -97,6 +97,7 @@ struct mlx4_txq_stats { struct txq { struct mlx4_sq msq; /**< Info for directly manipulating the SQ. */ struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */ + uint32_t bfreg_idx; /**< Blueflame register index. */ unsigned int elts_head; /**< Current index in (*elts)[]. */ unsigned int elts_tail; /**< First element awaiting completion. */ int elts_comp_cd; /**< Countdown for next completion. */ @@ -152,7 +153,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, /* mlx4_txq.c */ -int mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd); +int mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd); uint64_t mlx4_get_tx_port_offloads(struct mlx4_priv *priv); int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int socket, diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c index ed00843425..51d74d6c80 100644 --- a/drivers/net/mlx4/mlx4_txq.c +++ b/drivers/net/mlx4/mlx4_txq.c @@ -39,10 +39,15 @@ #include "mlx4_rxtx.h" #include "mlx4_utils.h" +#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET /** - * Mmap TX UAR(HW doorbell) pages into reserved UAR address space. - * Both primary and secondary process do mmap to make UAR address - * aligned. + * Initialize UAR register access for Tx. + * + * Primary process shouldn't call this function. + * + * For secondary, remap BlueFlame registers for secondary process. Remapped + * address is stored at the same indexed entry of the local UAR register table + * as primary process. * * @param[in] dev * Pointer to Ethernet device. @@ -52,83 +57,41 @@ * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ -#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET int -mlx4_tx_uar_remap(struct rte_eth_dev *dev, int fd) +mlx4_txq_uar_init_secondary(struct rte_eth_dev *dev, int fd) { - unsigned int i, j; const unsigned int txqs_n = dev->data->nb_tx_queues; - uintptr_t pages[txqs_n]; - unsigned int pages_n = 0; - uintptr_t uar_va; - uintptr_t off; - void *addr; - void *ret; struct txq *txq; - int already_mapped; + void *addr; + void **addr_ptr; size_t page_size = sysconf(_SC_PAGESIZE); + unsigned int i; - memset(pages, 0, txqs_n * sizeof(uintptr_t)); + assert(rte_eal_process_type() == RTE_PROC_SECONDARY); /* * As rdma-core, UARs are mapped in size of OS page size. - * Use aligned address to avoid duplicate mmap. * Ref to libmlx4 function: mlx4_init_context() */ for (i = 0; i != txqs_n; ++i) { txq = dev->data->tx_queues[i]; if (!txq) continue; - /* UAR addr form verbs used to find dup and offset in page. */ - uar_va = (uintptr_t)txq->msq.qp_sdb; - off = uar_va & (page_size - 1); /* offset in page. */ - uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */ - already_mapped = 0; - for (j = 0; j != pages_n; ++j) { - if (pages[j] == uar_va) { - already_mapped = 1; - break; - } - } - /* new address in reserved UAR address space. */ - addr = RTE_PTR_ADD(mlx4_shared_data->uar_base, - uar_va & (uintptr_t)(MLX4_UAR_SIZE - 1)); - if (!already_mapped) { - pages[pages_n++] = uar_va; - /* fixed mmap to specified address in reserved - * address space. - */ - ret = mmap(addr, page_size, - PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, - txq->msq.uar_mmap_offset); - if (ret != addr) { - /* fixed mmap has to return same address. */ - ERROR("port %u call to mmap failed on UAR" - " for txq %u", - dev->data->port_id, i); - rte_errno = ENXIO; - return -rte_errno; - } + addr = mmap(NULL, page_size, PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, + txq->msq.uar_mmap_offset); + if (addr == MAP_FAILED) { + ERROR("port %u mmap failed for BF reg. of txq %u", + dev->data->port_id, i); + rte_errno = ENXIO; + return -rte_errno; } - if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once. */ - txq->msq.db = RTE_PTR_ADD((void *)addr, off); - else - assert(txq->msq.db == - RTE_PTR_ADD((void *)addr, off)); + addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx); + if (!addr_ptr) + return -rte_errno; + *addr_ptr = addr; } return 0; } -#else -int -mlx4_tx_uar_remap(struct rte_eth_dev *dev __rte_unused, int fd __rte_unused) -{ - /* - * If rdma-core doesn't support UAR remap, secondary process is not - * supported, thus secondary cannot call this function but only primary - * makes a call. Return success to not interrupt initialization. - */ - assert(rte_eal_process_type() == RTE_PROC_PRIMARY); - return 0; -} #endif /** @@ -185,10 +148,8 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv) (0u << MLX4_SQ_OWNER_BIT)); #ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET sq->uar_mmap_offset = dqp->uar_mmap_offset; - sq->qp_sdb = dqp->sdb; -#else - sq->db = dqp->sdb; #endif + *mlx4_uar_get_addr_ptr(txq->bfreg_idx) = dqp->sdb; sq->doorbell_qpn = dqp->doorbell_qpn; cq->buf = dcq->buf.buf; cq->cqe_cnt = dcq->cqe_cnt; @@ -255,6 +216,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, struct ibv_qp_init_attr qp_init_attr; struct txq *txq; uint8_t *bounce_buf; + void **addr_ptr; struct mlx4_malloc_vec vec[] = { { .align = RTE_CACHE_LINE_SIZE, @@ -429,6 +391,15 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, goto error; } #endif + /* Allocate a new index in UAR table. */ + ret = mlx4_uar_alloc_index(); + if (ret < 0) + goto error; + txq->bfreg_idx = ret; + /* Make sure the local UAR register table is properly expanded. */ + addr_ptr = mlx4_uar_get_addr_ptr(txq->bfreg_idx); + if (!addr_ptr) + goto error; mlx4_txq_fill_dv_obj_info(txq, &mlxdv); /* Save first wqe pointer in the first element. */ (&(*txq->elts)[0])->wqe = @@ -478,6 +449,7 @@ mlx4_tx_queue_release(void *dpdk_txq) break; } mlx4_txq_free_elts(txq); + mlx4_uar_free_index(txq->bfreg_idx); if (txq->qp) claim_zero(mlx4_glue->destroy_qp(txq->qp)); if (txq->cq) -- 2.11.0