From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 0C46BA0C45; Wed, 20 Oct 2021 09:34:37 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id E6C9340150; Wed, 20 Oct 2021 09:34:36 +0200 (CEST) Received: from szxga01-in.huawei.com (szxga01-in.huawei.com [45.249.212.187]) by mails.dpdk.org (Postfix) with ESMTP id 7077A40142 for ; Wed, 20 Oct 2021 09:34:35 +0200 (CEST) Received: from dggemv711-chm.china.huawei.com (unknown [172.30.72.53]) by szxga01-in.huawei.com (SkyGuard) with ESMTP id 4HZ2Lw1N7WzbnJN; Wed, 20 Oct 2021 15:30:00 +0800 (CST) Received: from dggpeml500024.china.huawei.com (7.185.36.10) by dggemv711-chm.china.huawei.com (10.1.198.66) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2308.15; Wed, 20 Oct 2021 15:34:31 +0800 Received: from [127.0.0.1] (10.67.100.224) by dggpeml500024.china.huawei.com (7.185.36.10) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2308.15; Wed, 20 Oct 2021 15:34:31 +0800 To: Kevin Laatz , CC: , , , References: <20210827172048.558704-1-kevin.laatz@intel.com> <20211019141041.1890983-1-kevin.laatz@intel.com> <20211019141041.1890983-6-kevin.laatz@intel.com> From: fengchengwen Message-ID: <00105951-b185-b813-d3a4-252f3e7082db@huawei.com> Date: Wed, 20 Oct 2021 15:34:31 +0800 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.11.0 MIME-Version: 1.0 In-Reply-To: <20211019141041.1890983-6-kevin.laatz@intel.com> Content-Type: text/plain; charset="utf-8" Content-Language: en-US Content-Transfer-Encoding: 7bit X-Originating-IP: [10.67.100.224] X-ClientProxiedBy: dggems702-chm.china.huawei.com (10.3.19.179) To dggpeml500024.china.huawei.com (7.185.36.10) X-CFilter-Loop: Reflected Subject: Re: [dpdk-dev] [PATCH v10 05/16] dma/idxd: create dmadev instances on pci probe X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" On 2021/10/19 22:10, Kevin Laatz wrote: > When a suitable device is found during the PCI probe, create a dmadev > instance for each HW queue. HW definitions required are also included. > > Signed-off-by: Bruce Richardson > Signed-off-by: Kevin Laatz > Reviewed-by: Conor Walsh > --- [snip] > > +static inline int > +idxd_pci_dev_command(struct idxd_dmadev *idxd, enum rte_idxd_cmds command) > +{ > + uint8_t err_code; > + uint16_t qid = idxd->qid; > + int i = 0; > + > + if (command >= idxd_disable_wq && command <= idxd_reset_wq) > + qid = (1 << qid); > + rte_spinlock_lock(&idxd->u.pci->lk); > + idxd->u.pci->regs->cmd = (command << IDXD_CMD_SHIFT) | qid; > + > + do { > + rte_pause(); > + err_code = idxd->u.pci->regs->cmdstatus; > + if (++i >= 1000) { > + IDXD_PMD_ERR("Timeout waiting for command response from HW"); > + rte_spinlock_unlock(&idxd->u.pci->lk); > + return err_code; > + } > + } while (idxd->u.pci->regs->cmdstatus & CMDSTATUS_ACTIVE_MASK); why not while (err_code & CMDSTATUS_ACTIVE_MASK) ? the cmdstatus reg may change in load to err_code and while judge, so suggest always use err_code. > + rte_spinlock_unlock(&idxd->u.pci->lk); > + > + return err_code & CMDSTATUS_ERR_MASK; > +} > + > +static uint32_t * > +idxd_get_wq_cfg(struct idxd_pci_common *pci, uint8_t wq_idx) > +{ > + return RTE_PTR_ADD(pci->wq_regs_base, > + (uintptr_t)wq_idx << (5 + pci->wq_cfg_sz)); > +} > + > +static int > +idxd_is_wq_enabled(struct idxd_dmadev *idxd) > +{ > + uint32_t state = idxd_get_wq_cfg(idxd->u.pci, idxd->qid)[wq_state_idx]; > + return ((state >> WQ_STATE_SHIFT) & WQ_STATE_MASK) == 0x1; > +} > + > +static int > +idxd_pci_dev_close(struct rte_dma_dev *dev) > +{ > + struct idxd_dmadev *idxd = dev->fp_obj->dev_private; > + uint8_t err_code; > + > + /* disable the device */ > + err_code = idxd_pci_dev_command(idxd, idxd_disable_dev); > + if (err_code) { > + IDXD_PMD_ERR("Error disabling device: code %#x", err_code); > + return err_code; > + } > + IDXD_PMD_DEBUG("IDXD Device disabled OK"); > + > + /* free device memory */ > + IDXD_PMD_DEBUG("Freeing device driver memory"); > + rte_free(idxd->batch_idx_ring); > + > + return 0; > +} > + > +static const struct rte_dma_dev_ops idxd_pci_ops = { > + .dev_close = idxd_pci_dev_close, > +}; > + > +/* each portal uses 4 x 4k pages */ > +#define IDXD_PORTAL_SIZE (4096 * 4) > + > +static int > +init_pci_device(struct rte_pci_device *dev, struct idxd_dmadev *idxd, > + unsigned int max_queues) > +{ > + struct idxd_pci_common *pci; > + uint8_t nb_groups, nb_engines, nb_wqs; > + uint16_t grp_offset, wq_offset; /* how far into bar0 the regs are */ > + uint16_t wq_size, total_wq_size; > + uint8_t lg2_max_batch, lg2_max_copy_size; > + unsigned int i, err_code; > + > + pci = malloc(sizeof(*pci)); > + if (pci == NULL) { > + IDXD_PMD_ERR("%s: Can't allocate memory", __func__); > + goto err; > + } > + rte_spinlock_init(&pci->lk); > + > + /* assign the bar registers, and then configure device */ > + pci->regs = dev->mem_resource[0].addr; > + grp_offset = (uint16_t)pci->regs->offsets[0]; > + pci->grp_regs = RTE_PTR_ADD(pci->regs, grp_offset * 0x100); > + wq_offset = (uint16_t)(pci->regs->offsets[0] >> 16); > + pci->wq_regs_base = RTE_PTR_ADD(pci->regs, wq_offset * 0x100); > + pci->portals = dev->mem_resource[2].addr; > + pci->wq_cfg_sz = (pci->regs->wqcap >> 24) & 0x0F; > + > + /* sanity check device status */ > + if (pci->regs->gensts & GENSTS_DEV_STATE_MASK) { > + /* need function-level-reset (FLR) or is enabled */ > + IDXD_PMD_ERR("Device status is not disabled, cannot init"); > + goto err; > + } > + if (pci->regs->cmdstatus & CMDSTATUS_ACTIVE_MASK) { > + /* command in progress */ > + IDXD_PMD_ERR("Device has a command in progress, cannot init"); > + goto err; > + } > + > + /* read basic info about the hardware for use when configuring */ > + nb_groups = (uint8_t)pci->regs->grpcap; > + nb_engines = (uint8_t)pci->regs->engcap; > + nb_wqs = (uint8_t)(pci->regs->wqcap >> 16); > + total_wq_size = (uint16_t)pci->regs->wqcap; > + lg2_max_copy_size = (uint8_t)(pci->regs->gencap >> 16) & 0x1F; > + lg2_max_batch = (uint8_t)(pci->regs->gencap >> 21) & 0x0F; > + > + IDXD_PMD_DEBUG("nb_groups = %u, nb_engines = %u, nb_wqs = %u", > + nb_groups, nb_engines, nb_wqs); > + > + /* zero out any old config */ > + for (i = 0; i < nb_groups; i++) { > + pci->grp_regs[i].grpengcfg = 0; > + pci->grp_regs[i].grpwqcfg[0] = 0; > + } > + for (i = 0; i < nb_wqs; i++) > + idxd_get_wq_cfg(pci, i)[0] = 0; > + > + /* limit queues if necessary */ > + if (max_queues != 0 && nb_wqs > max_queues) { > + nb_wqs = max_queues; > + if (nb_engines > max_queues) > + nb_engines = max_queues; > + if (nb_groups > max_queues) > + nb_engines = max_queues; > + IDXD_PMD_DEBUG("Limiting queues to %u", nb_wqs); > + } > + > + /* put each engine into a separate group to avoid reordering */ > + if (nb_groups > nb_engines) > + nb_groups = nb_engines; > + if (nb_groups < nb_engines) > + nb_engines = nb_groups; > + > + /* assign engines to groups, round-robin style */ > + for (i = 0; i < nb_engines; i++) { > + IDXD_PMD_DEBUG("Assigning engine %u to group %u", > + i, i % nb_groups); > + pci->grp_regs[i % nb_groups].grpengcfg |= (1ULL << i); > + } > + > + /* now do the same for queues and give work slots to each queue */ > + wq_size = total_wq_size / nb_wqs; > + IDXD_PMD_DEBUG("Work queue size = %u, max batch = 2^%u, max copy = 2^%u", > + wq_size, lg2_max_batch, lg2_max_copy_size); > + for (i = 0; i < nb_wqs; i++) { > + /* add engine "i" to a group */ > + IDXD_PMD_DEBUG("Assigning work queue %u to group %u", > + i, i % nb_groups); > + pci->grp_regs[i % nb_groups].grpwqcfg[0] |= (1ULL << i); > + /* now configure it, in terms of size, max batch, mode */ > + idxd_get_wq_cfg(pci, i)[wq_size_idx] = wq_size; > + idxd_get_wq_cfg(pci, i)[wq_mode_idx] = (1 << WQ_PRIORITY_SHIFT) | > + WQ_MODE_DEDICATED; > + idxd_get_wq_cfg(pci, i)[wq_sizes_idx] = lg2_max_copy_size | > + (lg2_max_batch << WQ_BATCH_SZ_SHIFT); > + } > + > + /* dump the group configuration to output */ > + for (i = 0; i < nb_groups; i++) { > + IDXD_PMD_DEBUG("## Group %d", i); > + IDXD_PMD_DEBUG(" GRPWQCFG: %"PRIx64, pci->grp_regs[i].grpwqcfg[0]); > + IDXD_PMD_DEBUG(" GRPENGCFG: %"PRIx64, pci->grp_regs[i].grpengcfg); > + IDXD_PMD_DEBUG(" GRPFLAGS: %"PRIx32, pci->grp_regs[i].grpflags); > + } > + > + idxd->u.pci = pci; > + idxd->max_batches = wq_size; > + > + /* enable the device itself */ > + err_code = idxd_pci_dev_command(idxd, idxd_enable_dev); > + if (err_code) { > + IDXD_PMD_ERR("Error enabling device: code %#x", err_code); > + return err_code; 1. the err_code come from idxd->u.pci->regs->cmdstatus which may >0, I think it better use negative explicit. 2. suggest use goto err which also free pci memory. > + } > + IDXD_PMD_DEBUG("IDXD Device enabled OK"); > + > + return nb_wqs; > + > +err: > + free(pci); > + return -1; > +} > + > static int > idxd_dmadev_probe_pci(struct rte_pci_driver *drv, struct rte_pci_device *dev) > { > - int ret = 0; > + struct idxd_dmadev idxd = {0}; > + uint8_t nb_wqs; > + int qid, ret = 0; > char name[PCI_PRI_STR_SIZE]; > + unsigned int max_queues = 0; > > rte_pci_device_name(&dev->addr, name, sizeof(name)); > IDXD_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node); > dev->device.driver = &drv->driver; > > - return ret; > + if (dev->device.devargs && dev->device.devargs->args[0] != '\0') { > + /* if the number of devargs grows beyond just 1, use rte_kvargs */ > + if (sscanf(dev->device.devargs->args, > + "max_queues=%u", &max_queues) != 1) { > + IDXD_PMD_ERR("Invalid device parameter: '%s'", > + dev->device.devargs->args); > + return -1; > + } > + } > + > + ret = init_pci_device(dev, &idxd, max_queues); > + if (ret < 0) { > + IDXD_PMD_ERR("Error initializing PCI hardware"); > + return ret; > + } > + if (idxd.u.pci->portals == NULL) { > + IDXD_PMD_ERR("Error, invalid portal assigned during initialization\n"); need free idxd.u.pci's memory. > + return -EINVAL; > + } > + nb_wqs = (uint8_t)ret; > + > + /* set up one device for each queue */ > + for (qid = 0; qid < nb_wqs; qid++) { > + char qname[32]; > + > + /* add the queue number to each device name */ > + snprintf(qname, sizeof(qname), "%s-q%d", name, qid); > + idxd.qid = qid; > + idxd.portal = RTE_PTR_ADD(idxd.u.pci->portals, > + qid * IDXD_PORTAL_SIZE); > + if (idxd_is_wq_enabled(&idxd)) > + IDXD_PMD_ERR("Error, WQ %u seems enabled", qid); > + ret = idxd_dmadev_create(qname, &dev->device, > + &idxd, &idxd_pci_ops); > + if (ret != 0) { > + IDXD_PMD_ERR("Failed to create dmadev %s", name); > + if (qid == 0) /* if no devices using this, free pci */ > + free(idxd.u.pci); > + return ret; > + } > + } > + > + return 0; > +} > + > +static int > +idxd_dmadev_destroy(const char *name) > +{ > + int ret; > + > + /* rte_dma_close is called by pmd_release */ > + ret = rte_dma_pmd_release(name); > + if (ret) > + IDXD_PMD_DEBUG("Device cleanup failed"); > + > + return 0; > } > > static int > @@ -39,7 +292,7 @@ idxd_dmadev_remove_pci(struct rte_pci_device *dev) > IDXD_PMD_INFO("Closing %s on NUMA node %d", > name, dev->device.numa_node); > > - return 0; > + return idxd_dmadev_destroy(name); The name should be 'snprintf(qname, sizeof(qname), "%s-q%d", name, qid)', and also free auxiliary memory like idxd.u.pci > } > > struct rte_pci_driver idxd_pmd_drv_pci = { >