From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 28CCBA00BE; Tue, 17 May 2022 08:33:33 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D640742B94; Tue, 17 May 2022 08:33:17 +0200 (CEST) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by mails.dpdk.org (Postfix) with ESMTP id 9540D42B90; Tue, 17 May 2022 08:33:14 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1652769195; x=1684305195; h=from:to:cc:subject:date:message-id:mime-version: content-transfer-encoding; bh=lMADiutEkQlq8BlBkT4At/jaVMotknRrPmHF6NYgtyY=; b=Ng/i9V+4tzp0l1kWIZSA+cz0fefAS+0GY7KSHDzeHj4XlNPD/Q2gc7s4 X6m32phUAZTodaXCYsYUY+j/7qw7//wXbrPJl/n+/5YNVZSmfSIhr9RuM oTHOip2WalDZt6UPpEb6C4T6M+vzUzveXZnJZhvDx/qao8yL+tUJXykML DSdrYlipcU4Ax2EGvwDHVDy98yHtCTbLPaaiJsScjt+A8Yu+Q5LqnBctZ vqZ1uC8j1HPhQKUurZu83Vt/tqvGT+sGjuxpDE069PTKHyELk68lhb2oU zL0i0av4YvYY3IX69/05+7wLSmHOi1K96ehgXHeMrYSo7MeuxtP9Z8lM6 g==; X-IronPort-AV: E=McAfee;i="6400,9594,10349"; a="296354030" X-IronPort-AV: E=Sophos;i="5.91,232,1647327600"; d="scan'208";a="296354030" Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 May 2022 23:33:13 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.91,232,1647327600"; d="scan'208";a="522820324" Received: from unknown (HELO localhost.localdomain.sh.intel.com) ([10.238.175.107]) by orsmga003.jf.intel.com with ESMTP; 16 May 2022 23:33:10 -0700 From: Wei Huang To: dev@dpdk.org, thomas@monjalon.net, nipun.gupta@nxp.com, hemant.agrawal@nxp.com Cc: stable@dpdk.org, rosen.xu@intel.com, tianfei.zhang@intel.com, qi.z.zhang@intel.com, Wei Huang Subject: [PATCH v1] raw/afu_mf: introduce AFU MF device driver Date: Tue, 17 May 2022 02:29:31 -0400 Message-Id: <20220517062931.1161861-1-wei.huang@intel.com> X-Mailer: git-send-email 2.26.2 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Add afu_mf driver to manage various AFU (Acceleration Function Unit) in FPGA. Signed-off-by: Wei Huang Acked-by: Tianfei Zhang --- drivers/raw/afu_mf/afu_mf_rawdev.c | 440 ++++++++ drivers/raw/afu_mf/afu_mf_rawdev.h | 89 ++ drivers/raw/afu_mf/he_hssi.c | 369 +++++++ drivers/raw/afu_mf/he_hssi.h | 102 ++ drivers/raw/afu_mf/he_lbk.c | 429 ++++++++ drivers/raw/afu_mf/he_lbk.h | 121 +++ drivers/raw/afu_mf/he_mem.c | 181 ++++ drivers/raw/afu_mf/he_mem.h | 40 + drivers/raw/afu_mf/meson.build | 8 + drivers/raw/afu_mf/n3000_afu.c | 1997 ++++++++++++++++++++++++++++++++++++ drivers/raw/afu_mf/n3000_afu.h | 333 ++++++ drivers/raw/afu_mf/rte_pmd_afu.h | 134 +++ drivers/raw/afu_mf/version.map | 3 + drivers/raw/meson.build | 1 + 14 files changed, 4247 insertions(+) create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h create mode 100644 drivers/raw/afu_mf/he_hssi.c create mode 100644 drivers/raw/afu_mf/he_hssi.h create mode 100644 drivers/raw/afu_mf/he_lbk.c create mode 100644 drivers/raw/afu_mf/he_lbk.h create mode 100644 drivers/raw/afu_mf/he_mem.c create mode 100644 drivers/raw/afu_mf/he_mem.h create mode 100644 drivers/raw/afu_mf/meson.build create mode 100644 drivers/raw/afu_mf/n3000_afu.c create mode 100644 drivers/raw/afu_mf/n3000_afu.h create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h create mode 100644 drivers/raw/afu_mf/version.map diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c new file mode 100644 index 0000000..f24c748 --- /dev/null +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c @@ -0,0 +1,440 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2022 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "rte_pmd_afu.h" +#include "afu_mf_rawdev.h" +#include "n3000_afu.h" +#include "he_lbk.h" +#include "he_mem.h" +#include "he_hssi.h" + +#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf + +static const struct rte_afu_uuid afu_uuid_map[] = { + { N3000_AFU_UUID_L, N3000_AFU_UUID_H }, + { HE_LBK_UUID_L, HE_LBK_UUID_H }, + { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H }, + { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H }, + { HE_HSSI_UUID_L, HE_HSSI_UUID_H }, + { 0, 0 /* sentinel */ } +}; + +static struct afu_mf_drv *afu_table[] = { + &n3000_afu_drv, + &he_lbk_drv, + &he_mem_lbk_drv, + &he_mem_tg_drv, + &he_hssi_drv, + NULL +}; + +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev) +{ + int32_t x = 0; + + if (!dev || !dev->shared) + return -ENODEV; + + x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED); + + if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1, + 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0)) + return -EBUSY; + + return 0; +} + +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev) +{ + if (!dev || !dev->shared) + return; + + __atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE); +} + +static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev, + rte_rawdev_obj_t config, size_t config_size) +{ + struct afu_mf_rawdev *dev = NULL; + int ret = 0; + + AFU_MF_PMD_FUNC_TRACE(); + + dev = afu_mf_rawdev_get_priv(rawdev); + if (!dev) + return -ENODEV; + + if (dev->ops && dev->ops->config) + ret = (*dev->ops->config)(dev, config, config_size); + + return ret; +} + +static int afu_mf_rawdev_start(struct rte_rawdev *rawdev) +{ + struct afu_mf_rawdev *dev = NULL; + int ret = 0; + + AFU_MF_PMD_FUNC_TRACE(); + + dev = afu_mf_rawdev_get_priv(rawdev); + if (!dev) + return -ENODEV; + + ret = afu_mf_trylock(dev); + if (ret) { + AFU_MF_PMD_WARN("AFU is busy, please start it later"); + return ret; + } + + if (dev->ops && dev->ops->start) + ret = (*dev->ops->start)(dev); + + afu_mf_unlock(dev); + + return ret; +} + +static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev) +{ + struct afu_mf_rawdev *dev = NULL; + int ret = 0; + + AFU_MF_PMD_FUNC_TRACE(); + + dev = afu_mf_rawdev_get_priv(rawdev); + if (!dev) + return; + + ret = afu_mf_trylock(dev); + if (ret) { + AFU_MF_PMD_WARN("AFU is busy, please stop it later"); + return; + } + + if (dev->ops && dev->ops->stop) + ret = (*dev->ops->stop)(dev); + + afu_mf_unlock(dev); +} + +static int afu_mf_rawdev_close(struct rte_rawdev *rawdev) +{ + struct afu_mf_rawdev *dev = NULL; + int ret = 0; + + AFU_MF_PMD_FUNC_TRACE(); + + dev = afu_mf_rawdev_get_priv(rawdev); + if (!dev) + return -ENODEV; + + if (dev->ops && dev->ops->close) + ret = (*dev->ops->close)(dev); + + return ret; +} + +static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev) +{ + struct afu_mf_rawdev *dev = NULL; + int ret = 0; + + AFU_MF_PMD_FUNC_TRACE(); + + dev = afu_mf_rawdev_get_priv(rawdev); + if (!dev) + return -ENODEV; + + ret = afu_mf_trylock(dev); + if (ret) { + AFU_MF_PMD_WARN("AFU is busy, please reset it later"); + return ret; + } + + if (dev->ops && dev->ops->reset) + ret = (*dev->ops->reset)(dev); + + afu_mf_unlock(dev); + + return ret; +} + +static int afu_mf_rawdev_selftest(uint16_t dev_id) +{ + struct afu_mf_rawdev *dev = NULL; + int ret = 0; + + AFU_MF_PMD_FUNC_TRACE(); + + if (!rte_rawdev_pmd_is_valid_dev(dev_id)) + return -ENODEV; + + dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]); + if (!dev) + return -ENOENT; + + ret = afu_mf_trylock(dev); + if (ret) { + AFU_MF_PMD_WARN("AFU is busy, please test it later"); + return ret; + } + + if (dev->ops && dev->ops->test) + ret = (*dev->ops->test)(dev); + + afu_mf_unlock(dev); + + return ret; +} + +static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f) +{ + struct afu_mf_rawdev *dev = NULL; + int ret = 0; + + AFU_MF_PMD_FUNC_TRACE(); + + dev = afu_mf_rawdev_get_priv(rawdev); + if (!dev) + return -ENODEV; + + if (dev->ops && dev->ops->dump) + ret = (*dev->ops->dump)(dev, f); + + return ret; +} + +static const struct rte_rawdev_ops afu_mf_rawdev_ops = { + .dev_info_get = NULL, + .dev_configure = afu_mf_rawdev_configure, + .dev_start = afu_mf_rawdev_start, + .dev_stop = afu_mf_rawdev_stop, + .dev_close = afu_mf_rawdev_close, + .dev_reset = afu_mf_rawdev_reset, + + .queue_def_conf = NULL, + .queue_setup = NULL, + .queue_release = NULL, + .queue_count = NULL, + + .attr_get = NULL, + .attr_set = NULL, + + .enqueue_bufs = NULL, + .dequeue_bufs = NULL, + + .dump = afu_mf_rawdev_dump, + + .xstats_get = NULL, + .xstats_get_names = NULL, + .xstats_get_by_name = NULL, + .xstats_reset = NULL, + + .firmware_status_get = NULL, + .firmware_version_get = NULL, + .firmware_load = NULL, + .firmware_unload = NULL, + + .dev_selftest = afu_mf_rawdev_selftest, +}; + +static int +afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data, + int socket_id) +{ + const struct rte_memzone *mz; + char mz_name[RTE_MEMZONE_NAMESIZE]; + struct afu_mf_shared *ptr = NULL; + int init_mz = 0; + + if (!name || !data) + return -EINVAL; + + /* name format is afu_?|??:??.? which is unique */ + snprintf(mz_name, sizeof(mz_name), "%s", name); + + mz = rte_memzone_lookup(mz_name); + if (!mz) { + mz = rte_memzone_reserve(mz_name, + sizeof(struct afu_mf_shared), + socket_id, 0); + init_mz = 1; + } + + if (!mz) { + AFU_MF_PMD_ERR("Allocate memory zone %s failed!", + mz_name); + return -ENOMEM; + } + + ptr = (struct afu_mf_shared *)mz->addr; + + if (init_mz) /* initialize memory zone on the first time */ + ptr->lock = 0; + + *data = ptr; + + return 0; +} + +static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char *name, + size_t size) +{ + int n = 0; + + if (!afu_dev || !name || !size) + return -EINVAL; + + n = snprintf(name, size, "afu_%s", afu_dev->device.name); + if (n >= (int)size) { + AFU_MF_PMD_ERR("Name of AFU device is too long!"); + return -ENAMETOOLONG; + } + + return 0; +} + +static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id) +{ + struct afu_mf_drv *entry = NULL; + int i = 0; + + if (!afu_id) + return NULL; + + while ((entry = afu_table[i++])) { + if ((entry->uuid.uuid_low == afu_id->uuid_low) && + (entry->uuid.uuid_high == afu_id->uuid_high)) + break; + } + + return entry ? entry->ops : NULL; +} + +static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int socket_id) +{ + struct rte_rawdev *rawdev = NULL; + struct afu_mf_rawdev *dev = NULL; + char name[RTE_RAWDEV_NAME_MAX_LEN] = {0}; + int ret = 0; + + if (!afu_dev) + return -EINVAL; + + ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name)); + if (ret) + return ret; + + AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d", + name, socket_id); + + /* Allocate device structure */ + rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct afu_mf_rawdev), + socket_id); + if (!rawdev) { + AFU_MF_PMD_ERR("Unable to allocate raw device"); + return -ENOMEM; + } + + rawdev->dev_ops = &afu_mf_rawdev_ops; + rawdev->device = &afu_dev->device; + rawdev->driver_name = afu_dev->driver->driver.name; + + dev = afu_mf_rawdev_get_priv(rawdev); + if (!dev) + goto cleanup; + + dev->rawdev = rawdev; + dev->port = afu_dev->id.port; + dev->addr = afu_dev->mem_resource[0].addr; + dev->ops = afu_mf_ops_get(&afu_dev->id.uuid); + if (dev->ops == NULL) { + AFU_MF_PMD_ERR("Unsupported AFU device"); + goto cleanup; + } + + if (dev->ops->init) { + ret = (*dev->ops->init)(dev); + if (ret) { + AFU_MF_PMD_ERR("Failed to init %s", name); + goto cleanup; + } + } + + ret = afu_mf_shared_alloc(name, &dev->shared, socket_id); + if (ret) + goto cleanup; + + return ret; + +cleanup: + rte_rawdev_pmd_release(rawdev); + return ret; +} + +static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev) +{ + struct rte_rawdev *rawdev = NULL; + char name[RTE_RAWDEV_NAME_MAX_LEN] = {0}; + int ret = 0; + + if (!afu_dev) + return -EINVAL; + + ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name)); + if (ret) + return ret; + + AFU_MF_PMD_INFO("Destroy raw device %s", name); + + rawdev = rte_rawdev_pmd_get_named_dev(name); + if (!rawdev) { + AFU_MF_PMD_ERR("Raw device %s not found", name); + return -EINVAL; + } + + /* rte_rawdev_close is called by pmd_release */ + ret = rte_rawdev_pmd_release(rawdev); + if (ret) + AFU_MF_PMD_DEBUG("Device cleanup failed"); + + return 0; +} + +static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev) +{ + AFU_MF_PMD_FUNC_TRACE(); + return afu_mf_rawdev_create(afu_dev, rte_socket_id()); +} + +static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev) +{ + AFU_MF_PMD_FUNC_TRACE(); + return afu_mf_rawdev_destroy(afu_dev); +} + +static struct rte_afu_driver afu_mf_pmd_drv = { + .id_table = afu_uuid_map, + .probe = afu_mf_rawdev_probe, + .remove = afu_mf_rawdev_remove +}; + +RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv); +RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE); diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h b/drivers/raw/afu_mf/afu_mf_rawdev.h new file mode 100644 index 0000000..5690010 --- /dev/null +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2022 Intel Corporation + */ + +#ifndef __AFU_MF_RAWDEV_H__ +#define __AFU_MF_RAWDEV_H__ + +#include +#include +#include + +#include +#include +#include + +extern int afu_mf_pmd_logtype; + +#define AFU_MF_PMD_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \ + __func__, ##args) + +#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>") + +#define AFU_MF_PMD_DEBUG(fmt, args...) \ + AFU_MF_PMD_LOG(DEBUG, fmt, ## args) +#define AFU_MF_PMD_INFO(fmt, args...) \ + AFU_MF_PMD_LOG(INFO, fmt, ## args) +#define AFU_MF_PMD_ERR(fmt, args...) \ + AFU_MF_PMD_LOG(ERR, fmt, ## args) +#define AFU_MF_PMD_WARN(fmt, args...) \ + AFU_MF_PMD_LOG(WARNING, fmt, ## args) + +#define CACHE_LINE_SIZE(n) ((n) << 6) +#define CACHE_LINE_ALIGNED(n) ((n) >> 6) +#define MHZ(f) ((f) * 1000000) + +#define dsm_poll_timeout(addr, val, cond, invl, timeout) \ +({ \ + uint64_t __wait = 0; \ + uint64_t __invl = (invl); \ + uint64_t __timeout = (timeout); \ + for (; __wait <= __timeout; __wait += __invl) { \ + (val) = *(addr); \ + if (cond) \ + break; \ + rte_delay_ms(__invl); \ + } \ + (cond) ? 0 : 1; \ +}) + +struct afu_mf_rawdev; + +struct afu_mf_ops { + int (*init)(struct afu_mf_rawdev *dev); + int (*config)(struct afu_mf_rawdev *dev, void *config, + size_t config_size); + int (*start)(struct afu_mf_rawdev *dev); + int (*stop)(struct afu_mf_rawdev *dev); + int (*test)(struct afu_mf_rawdev *dev); + int (*close)(struct afu_mf_rawdev *dev); + int (*reset)(struct afu_mf_rawdev *dev); + int (*dump)(struct afu_mf_rawdev *dev, FILE *f); +}; + +struct afu_mf_drv { + struct rte_afu_uuid uuid; + struct afu_mf_ops *ops; +}; + +struct afu_mf_shared { + int32_t lock; +}; + +struct afu_mf_rawdev { + struct rte_rawdev *rawdev; /* point to parent raw device */ + struct afu_mf_shared *shared; /* shared data for multi-process */ + struct afu_mf_ops *ops; /* device operation functions */ + int port; /* index of port the AFU attached */ + void *addr; /* base address of AFU registers */ + void *priv; /* private driver data */ +}; + +static inline struct afu_mf_rawdev * +afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev) +{ + return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL; +} + +#endif /* __AFU_MF_RAWDEV_H__ */ diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c new file mode 100644 index 0000000..68d8dba --- /dev/null +++ b/drivers/raw/afu_mf/he_hssi.c @@ -0,0 +1,369 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "afu_mf_rawdev.h" +#include "he_hssi.h" + +static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr, + uint32_t value) +{ + struct traffic_ctrl_cmd cmd; + struct traffic_ctrl_data data; + uint32_t i = 0; + + AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr, value); + + if (!ctx) + return -EINVAL; + + data.write_data = value; + rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA); + + cmd.csr = 0; + cmd.write_cmd = 1; + cmd.afu_cmd_addr = addr; + rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD); + + while (i < MAILBOX_TIMEOUT_MS) { + rte_delay_ms(MAILBOX_POLL_INTERVAL_MS); + cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD); + if (cmd.ack_trans) + break; + i += MAILBOX_POLL_INTERVAL_MS; + } + if (i >= MAILBOX_TIMEOUT_MS) + return -ETIME; + + i = 0; + cmd.csr = 0; + while (i < MAILBOX_TIMEOUT_MS) { + cmd.ack_trans = 1; + rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD); + rte_delay_ms(MAILBOX_POLL_INTERVAL_MS); + cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD); + if (!cmd.ack_trans) + break; + i += MAILBOX_POLL_INTERVAL_MS; + } + if (i >= MAILBOX_TIMEOUT_MS) + return -ETIME; + + return 0; +} + +static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr, + uint32_t *value) +{ + struct traffic_ctrl_cmd cmd; + struct traffic_ctrl_data data; + uint32_t i = 0; + + if (!ctx) + return -EINVAL; + + cmd.csr = 0; + cmd.read_cmd = 1; + cmd.afu_cmd_addr = addr; + rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD); + + while (i < MAILBOX_TIMEOUT_MS) { + rte_delay_ms(MAILBOX_POLL_INTERVAL_MS); + cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD); + if (cmd.ack_trans) { + data.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_DATA); + *value = data.read_data; + break; + } + i += MAILBOX_POLL_INTERVAL_MS; + } + if (i >= MAILBOX_TIMEOUT_MS) + return -ETIME; + + i = 0; + cmd.csr = 0; + while (i < MAILBOX_TIMEOUT_MS) { + cmd.ack_trans = 1; + rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD); + rte_delay_ms(MAILBOX_POLL_INTERVAL_MS); + cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD); + if (!cmd.ack_trans) + break; + i += MAILBOX_POLL_INTERVAL_MS; + } + if (i >= MAILBOX_TIMEOUT_MS) + return -ETIME; + + AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr, *value); + return 0; +} + +static void he_hssi_report(struct he_hssi_ctx *ctx) +{ + uint32_t val = 0; + uint64_t v64 = 0; + int ret = 0; + + ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val); + if (ret) + return; + printf("Number of good packets received: %u\n", val); + + ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val); + if (ret) + return; + printf("Number of bad packets received: %u\n", val); + + ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val); + if (ret) + return; + v64 = val; + ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val); + if (ret) + return; + v64 = (v64 << 32) | val; + printf("Number of bytes received: %"PRIu64"\n", v64); + + ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val); + if (ret) + return; + if (val & ERR_VALID) { + printf("AVST rx error:"); + if (val & OVERFLOW_ERR) + printf(" overflow"); + if (val & LENGTH_ERR) + printf(" length"); + if (val & OVERSIZE_ERR) + printf(" oversize"); + if (val & UNDERSIZE_ERR) + printf(" undersize"); + if (val & MAC_CRC_ERR) + printf(" crc"); + if (val & PHY_ERR) + printf(" phy"); + printf("\n"); + } + + ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val); + if (ret) + return; + if (val & (ALMOST_EMPTY | ALMOST_FULL)) { + printf("FIFO status:"); + if (val & ALMOST_EMPTY) + printf(" almost empty"); + if (val & ALMOST_FULL) + printf(" almost full"); + printf("\n"); + } +} + +static int he_hssi_test(struct afu_mf_rawdev *dev) +{ + struct he_hssi_priv *priv = NULL; + struct rte_pmd_afu_he_hssi_cfg *cfg = NULL; + struct he_hssi_ctx *ctx = NULL; + struct traffic_ctrl_ch_sel sel; + uint32_t val = 0; + uint32_t i = 0; + int ret = 0; + + if (!dev) + return -EINVAL; + + priv = (struct he_hssi_priv *)dev->priv; + if (!priv) + return -ENOENT; + + cfg = &priv->he_hssi_cfg; + ctx = &priv->he_hssi_ctx; + + ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0); + if (ret) + return ret; + + sel.channel_sel = cfg->port; + rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL); + + if (cfg->he_loopback >= 0) { + val = cfg->he_loopback ? 1 : 0; + AFU_MF_PMD_INFO("%s HE loopback on port %u", + val ? "Enable" : "Disable", cfg->port); + return he_hssi_indirect_write(ctx, LOOPBACK_EN, val); + } + + ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets); + if (ret) + return ret; + + ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length); + if (ret) + return ret; + + val = cfg->src_addr & 0xffffffff; + ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val); + if (ret) + return ret; + val = (cfg->src_addr >> 32) & 0xffff; + ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val); + if (ret) + return ret; + + val = cfg->dest_addr & 0xffffffff; + ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val); + if (ret) + return ret; + val = (cfg->dest_addr >> 32) & 0xffff; + ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val); + if (ret) + return ret; + + val = cfg->random_length ? 1 : 0; + ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val); + if (ret) + return ret; + + val = cfg->random_payload ? 1 : 0; + ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val); + if (ret) + return ret; + + for (i = 0; i < 3; i++) { + ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i), + cfg->rnd_seed[i]); + if (ret) + return ret; + } + + ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1); + if (ret) + return ret; + + while (i++ < cfg->timeout) { + ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val); + if (ret) + break; + if (val == cfg->num_packets) + break; + sleep(1); + } + + he_hssi_report(ctx); + + return ret; +} + +static int he_hssi_init(struct afu_mf_rawdev *dev) +{ + struct he_hssi_priv *priv = NULL; + struct he_hssi_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct he_hssi_priv *)dev->priv; + if (!priv) { + priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0); + if (!priv) + return -ENOMEM; + dev->priv = priv; + } + + ctx = &priv->he_hssi_ctx; + ctx->addr = (uint8_t *)dev->addr; + + return 0; +} + +static int he_hssi_config(struct afu_mf_rawdev *dev, void *config, + size_t config_size) +{ + struct he_hssi_priv *priv = NULL; + struct rte_pmd_afu_he_hssi_cfg *cfg = NULL; + + if (!dev || !config || !config_size) + return -EINVAL; + + priv = (struct he_hssi_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg)) + return -EINVAL; + + cfg = (struct rte_pmd_afu_he_hssi_cfg *)config; + if (cfg->port >= NUM_HE_HSSI_PORTS) + return -EINVAL; + + rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg)); + + return 0; +} + +static int he_hssi_close(struct afu_mf_rawdev *dev) +{ + if (!dev) + return -EINVAL; + + rte_free(dev->priv); + dev->priv = NULL; + + return 0; +} + +static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f) +{ + struct he_hssi_priv *priv = NULL; + struct he_hssi_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct he_hssi_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (!f) + f = stdout; + + ctx = &priv->he_hssi_ctx; + + fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr); + + return 0; +} + +static struct afu_mf_ops he_hssi_ops = { + .init = he_hssi_init, + .config = he_hssi_config, + .start = NULL, + .stop = NULL, + .test = he_hssi_test, + .close = he_hssi_close, + .dump = he_hssi_dump, + .reset = NULL +}; + +struct afu_mf_drv he_hssi_drv = { + .uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H }, + .ops = &he_hssi_ops +}; diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h new file mode 100644 index 0000000..f8b9623 --- /dev/null +++ b/drivers/raw/afu_mf/he_hssi.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#ifndef _HE_HSSI_H_ +#define _HE_HSSI_H_ + +#include "afu_mf_rawdev.h" +#include "rte_pmd_afu.h" + +#define HE_HSSI_UUID_L 0xbb370242ac130002 +#define HE_HSSI_UUID_H 0x823c334c98bf11ea +#define NUM_HE_HSSI_PORTS 8 + +extern struct afu_mf_drv he_hssi_drv; + +/* HE-HSSI registers definition */ +#define TRAFFIC_CTRL_CMD 0x30 +#define TRAFFIC_CTRL_DATA 0x38 +#define TRAFFIC_CTRL_CH_SEL 0x40 +#define AFU_SCRATCHPAD 0x48 + +#define TG_NUM_PKT 0x3c00 +#define TG_PKT_LEN_TYPE 0x3c01 +#define TG_DATA_PATTERN 0x3c02 +#define TG_START_XFR 0x3c03 +#define TG_STOP_XFR 0x3c04 +#define TG_SRC_MAC_L 0x3c05 +#define TG_SRC_MAC_H 0x3c06 +#define TG_DST_MAC_L 0x3c07 +#define TG_DST_MAC_H 0x3c08 +#define TG_PKT_XFRD 0x3c09 +#define TG_RANDOM_SEED(n) (0x3c0a + (n)) +#define TG_PKT_LEN 0x3c0d + +#define TM_NUM_PKT 0x3d00 +#define TM_PKT_GOOD 0x3d01 +#define TM_PKT_BAD 0x3d02 +#define TM_BYTE_CNT0 0x3d03 +#define TM_BYTE_CNT1 0x3d04 +#define TM_AVST_RX_ERR 0x3d07 +#define OVERFLOW_ERR (1 << 9) +#define LENGTH_ERR (1 << 8) +#define OVERSIZE_ERR (1 << 7) +#define UNDERSIZE_ERR (1 << 6) +#define MAC_CRC_ERR (1 << 5) +#define PHY_ERR (1 << 4) +#define ERR_VALID (1 << 3) + +#define LOOPBACK_EN 0x3e00 +#define LOOPBACK_FIFO_STATUS 0x3e01 +#define ALMOST_EMPTY (1 << 1) +#define ALMOST_FULL (1 << 0) + +#define MAILBOX_TIMEOUT_MS 100 +#define MAILBOX_POLL_INTERVAL_MS 10 + +struct traffic_ctrl_cmd { + union { + uint64_t csr; + struct { + uint32_t read_cmd:1; + uint32_t write_cmd:1; + uint32_t ack_trans:1; + uint32_t rsvd1:29; + uint32_t afu_cmd_addr:16; + uint32_t rsvd2:16; + }; + }; +}; + +struct traffic_ctrl_data { + union { + uint64_t csr; + struct { + uint32_t read_data; + uint32_t write_data; + }; + }; +}; + +struct traffic_ctrl_ch_sel { + union { + uint64_t csr; + struct { + uint32_t channel_sel:3; + uint32_t rsvd1:29; + uint32_t rsvd2; + }; + }; +}; + +struct he_hssi_ctx { + uint8_t *addr; +}; + +struct he_hssi_priv { + struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg; + struct he_hssi_ctx he_hssi_ctx; +}; + +#endif /* _HE_HSSI_H_ */ diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c new file mode 100644 index 0000000..d47ddde --- /dev/null +++ b/drivers/raw/afu_mf/he_lbk.c @@ -0,0 +1,429 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "afu_mf_rawdev.h" +#include "he_lbk.h" + +static int he_lbk_afu_config(struct afu_mf_rawdev *dev) +{ + struct he_lbk_priv *priv = NULL; + struct rte_pmd_afu_he_lbk_cfg *cfg = NULL; + struct he_lbk_csr_cfg v; + + if (!dev) + return -EINVAL; + + priv = (struct he_lbk_priv *)dev->priv; + if (!priv) + return -ENOENT; + + cfg = &priv->he_lbk_cfg; + + v.csr = 0; + + if (cfg->cont) + v.cont = 1; + + v.mode = cfg->mode; + v.trput_interleave = cfg->trput_interleave; + if (cfg->multi_cl == 4) + v.multicl_len = 2; + else + v.multicl_len = cfg->multi_cl - 1; + + AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr); + rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG); + + return 0; +} + +static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl) +{ + struct he_lbk_priv *priv = NULL; + struct rte_pmd_afu_he_lbk_cfg *cfg = NULL; + struct he_lbk_ctx *ctx = NULL; + struct he_lbk_dsm_status *stat = NULL; + struct he_lbk_status0 stat0; + struct he_lbk_status1 stat1; + uint64_t swtest_msg = 0; + uint64_t ticks = 0; + uint64_t info = 0; + double num, rd_bw, wr_bw; + + if (!dev || !dev->priv) + return; + + priv = (struct he_lbk_priv *)dev->priv; + cfg = &priv->he_lbk_cfg; + ctx = &priv->he_lbk_ctx; + + stat = ctx->status_ptr; + + swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG); + stat0.csr = rte_read64(ctx->addr + CSR_STATUS0); + stat1.csr = rte_read64(ctx->addr + CSR_STATUS1); + + if (cfg->cont) + ticks = stat->num_clocks - stat->start_overhead; + else + ticks = stat->num_clocks - + (stat->start_overhead + stat->end_overhead); + + if (cfg->freq_mhz == 0) { + info = rte_read64(ctx->addr + CSR_HE_INFO0); + AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16); + cfg->freq_mhz = info & 0xffff; + if (cfg->freq_mhz == 0) { + AFU_MF_PMD_INFO("Frequency of AFU clock is unknown." + " Assuming 350 MHz."); + cfg->freq_mhz = 350; + } + } + + num = (double)stat0.num_reads; + rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks; + num = (double)stat0.num_writes; + wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks; + + printf("Cachelines Read_Count Write_Count Pend_Read Pend_Write " + "Clocks@%uMHz Rd_Bandwidth Wr_Bandwidth\n", + cfg->freq_mhz); + printf("%10u %10u %10u %10u %10u %12lu %7.3f GB/s %7.3f GB/s\n", + cl, stat0.num_reads, stat0.num_writes, + stat1.num_pend_reads, stat1.num_pend_writes, + ticks, rd_bw / 1e9, wr_bw / 1e9); + printf("Test Message: 0x%"PRIx64"\n", swtest_msg); +} + +static int he_lbk_test(struct afu_mf_rawdev *dev) +{ + struct he_lbk_priv *priv = NULL; + struct rte_pmd_afu_he_lbk_cfg *cfg = NULL; + struct he_lbk_ctx *ctx = NULL; + struct he_lbk_csr_ctl ctl; + uint32_t *ptr = NULL; + uint32_t i, j, cl, val = 0; + uint64_t sval = 0; + int ret = 0; + + if (!dev) + return -EINVAL; + + priv = (struct he_lbk_priv *)dev->priv; + if (!priv) + return -ENOENT; + + cfg = &priv->he_lbk_cfg; + ctx = &priv->he_lbk_ctx; + + ctl.csr = 0; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + rte_delay_us(1000); + ctl.reset = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + + /* initialize DMA addresses */ + AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova); + rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova), + ctx->addr + CSR_SRC_ADDR); + + AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova); + rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova), + ctx->addr + CSR_DST_ADDR); + + AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova); + rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova), + ctx->addr + CSR_AFU_DSM_BASEL); + rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova) >> 32, + ctx->addr + CSR_AFU_DSM_BASEH); + + ret = he_lbk_afu_config(dev); + if (ret) + return ret; + + /* initialize src data */ + ptr = (uint32_t *)ctx->src_ptr; + j = CACHE_LINE_SIZE(cfg->end) >> 2; + for (i = 0; i < j; i++) + *ptr++ = i; + + /* start test */ + for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) { + memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl)); + memset(ctx->dsm_ptr, 0, DSM_SIZE); + + ctl.csr = 0; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + rte_delay_us(1000); + ctl.reset = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + + rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES); + + ctl.start = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + + if (cfg->cont) { + rte_delay_ms(cfg->timeout * 1000); + ctl.force_completion = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + ret = dsm_poll_timeout(&ctx->status_ptr->test_complete, + val, (val & 0x1) == 1, DSM_POLL_INTERVAL, + DSM_TIMEOUT); + if (ret) { + printf("DSM poll timeout\n"); + goto end; + } + } else { + ret = dsm_poll_timeout(&ctx->status_ptr->test_complete, + val, (val & 0x1) == 1, DSM_POLL_INTERVAL, + DSM_TIMEOUT); + if (ret) { + printf("DSM poll timeout\n"); + goto end; + } + ctl.force_completion = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + } + + he_lbk_report(dev, cl); + + i = 0; + while (i++ < 100) { + sval = rte_read64(ctx->addr + CSR_STATUS1); + if (sval == 0) + break; + rte_delay_us(1000); + } + + if (cfg->mode == NLB_MODE_LPBK) { + ptr = (uint32_t *)ctx->dest_ptr; + j = CACHE_LINE_SIZE(cl) >> 2; + for (i = 0; i < j; i++) { + if (*ptr++ != i) { + AFU_MF_PMD_ERR("Data mismatch @ %u", i); + break; + } + } + } + } + +end: + return 0; +} + +static int he_lbk_ctx_release(struct afu_mf_rawdev *dev) +{ + struct he_lbk_priv *priv = NULL; + struct he_lbk_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct he_lbk_priv *)dev->priv; + if (!priv) + return -ENOENT; + + ctx = &priv->he_lbk_ctx; + + rte_free(ctx->dsm_ptr); + ctx->dsm_ptr = NULL; + ctx->status_ptr = NULL; + + rte_free(ctx->src_ptr); + ctx->src_ptr = NULL; + + rte_free(ctx->dest_ptr); + ctx->dest_ptr = NULL; + + return 0; +} + +static int he_lbk_ctx_init(struct afu_mf_rawdev *dev) +{ + struct he_lbk_priv *priv = NULL; + struct he_lbk_ctx *ctx = NULL; + int ret = 0; + + if (!dev) + return -EINVAL; + + priv = (struct he_lbk_priv *)dev->priv; + if (!priv) + return -ENOENT; + + ctx = &priv->he_lbk_ctx; + ctx->addr = (uint8_t *)dev->addr; + + ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN); + if (!ctx->dsm_ptr) { + ret = -ENOMEM; + goto release; + } + ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr); + if (ctx->dsm_iova == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto release; + } + + ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE, + TEST_MEM_ALIGN); + if (!ctx->src_ptr) { + ret = -ENOMEM; + goto release; + } + ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr); + if (ctx->src_iova == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto release; + } + + ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE, + TEST_MEM_ALIGN); + if (!ctx->dest_ptr) { + ret = -ENOMEM; + goto release; + } + ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr); + if (ctx->dest_iova == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto release; + } + + ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr; + return 0; + +release: + he_lbk_ctx_release(dev); + return ret; +} + +static int he_lbk_init(struct afu_mf_rawdev *dev) +{ + if (!dev) + return -EINVAL; + + if (!dev->priv) { + dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0); + if (!dev->priv) + return -ENOMEM; + } + + return he_lbk_ctx_init(dev); +} + +static int he_lbk_config(struct afu_mf_rawdev *dev, void *config, + size_t config_size) +{ + struct he_lbk_priv *priv = NULL; + struct rte_pmd_afu_he_lbk_cfg *cfg = NULL; + + if (!dev || !config || !config_size) + return -EINVAL; + + priv = (struct he_lbk_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg)) + return -EINVAL; + + cfg = (struct rte_pmd_afu_he_lbk_cfg *)config; + if (cfg->mode > NLB_MODE_TRPUT) + return -EINVAL; + if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) && + (cfg->multi_cl != 4)) + return -EINVAL; + if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin > MAX_CACHE_LINES)) + return -EINVAL; + if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES)) + return -EINVAL; + + rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg)); + + return 0; +} + +static int he_lbk_close(struct afu_mf_rawdev *dev) +{ + if (!dev) + return -EINVAL; + + he_lbk_ctx_release(dev); + + rte_free(dev->priv); + dev->priv = NULL; + + return 0; +} + +static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f) +{ + struct he_lbk_priv *priv = NULL; + struct he_lbk_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct he_lbk_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (!f) + f = stdout; + + ctx = &priv->he_lbk_ctx; + + fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr); + fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr); + fprintf(f, "dsm_iova:\t%p\n", (void *)ctx->dsm_iova); + fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr); + fprintf(f, "src_iova:\t%p\n", (void *)ctx->src_iova); + fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr); + fprintf(f, "dest_iova:\t%p\n", (void *)ctx->dest_iova); + fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr); + + return 0; +} + +static struct afu_mf_ops he_lbk_ops = { + .init = he_lbk_init, + .config = he_lbk_config, + .start = NULL, + .stop = NULL, + .test = he_lbk_test, + .close = he_lbk_close, + .dump = he_lbk_dump, + .reset = NULL +}; + +struct afu_mf_drv he_lbk_drv = { + .uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H }, + .ops = &he_lbk_ops +}; + +struct afu_mf_drv he_mem_lbk_drv = { + .uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H }, + .ops = &he_lbk_ops +}; diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h new file mode 100644 index 0000000..c2e8a29 --- /dev/null +++ b/drivers/raw/afu_mf/he_lbk.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#ifndef _HE_LBK_H_ +#define _HE_LBK_H_ + +#include "afu_mf_rawdev.h" +#include "rte_pmd_afu.h" + +#define HE_LBK_UUID_L 0xb94b12284c31e02b +#define HE_LBK_UUID_H 0x56e203e9864f49a7 +#define HE_MEM_LBK_UUID_L 0xbb652a578330a8eb +#define HE_MEM_LBK_UUID_H 0x8568ab4e6ba54616 + +extern struct afu_mf_drv he_lbk_drv; +extern struct afu_mf_drv he_mem_lbk_drv; + +/* HE-LBK & HE-MEM-LBK registers definition */ +#define CSR_SCRATCHPAD0 0x100 +#define CSR_SCRATCHPAD1 0x108 +#define CSR_AFU_DSM_BASEL 0x110 +#define CSR_AFU_DSM_BASEH 0x114 +#define CSR_SRC_ADDR 0x120 +#define CSR_DST_ADDR 0x128 +#define CSR_NUM_LINES 0x130 +#define CSR_CTL 0x138 +#define CSR_CFG 0x140 +#define CSR_INACT_THRESH 0x148 +#define CSR_INTERRUPT0 0x150 +#define CSR_SWTEST_MSG 0x158 +#define CSR_STATUS0 0x160 +#define CSR_STATUS1 0x168 +#define CSR_ERROR 0x170 +#define CSR_STRIDE 0x178 +#define CSR_HE_INFO0 0x180 + +#define DSM_SIZE 0x200000 +#define DSM_POLL_INTERVAL 5 /* ms */ +#define DSM_TIMEOUT 1000 /* ms */ + +#define NLB_BUF_SIZE 0x400000 +#define TEST_MEM_ALIGN 1024 + +struct he_lbk_csr_ctl { + union { + uint32_t csr; + struct { + uint32_t reset:1; + uint32_t start:1; + uint32_t force_completion:1; + uint32_t reserved:29; + }; + }; +}; + +struct he_lbk_csr_cfg { + union { + uint32_t csr; + struct { + uint32_t rsvd1:1; + uint32_t cont:1; + uint32_t mode:3; + uint32_t multicl_len:2; + uint32_t rsvd2:13; + uint32_t trput_interleave:3; + uint32_t test_cfg:5; + uint32_t interrupt_on_error:1; + uint32_t interrupt_testmode:1; + uint32_t rsvd3:2; + }; + }; +}; + +struct he_lbk_status0 { + union { + uint64_t csr; + struct { + uint32_t num_writes; + uint32_t num_reads; + }; + }; +}; + +struct he_lbk_status1 { + union { + uint64_t csr; + struct { + uint32_t num_pend_writes; + uint32_t num_pend_reads; + }; + }; +}; + +struct he_lbk_dsm_status { + uint32_t test_complete; + uint32_t test_error; + uint64_t num_clocks; + uint32_t num_reads; + uint32_t num_writes; + uint32_t start_overhead; + uint32_t end_overhead; +}; + +struct he_lbk_ctx { + uint8_t *addr; + uint8_t *dsm_ptr; + uint64_t dsm_iova; + uint8_t *src_ptr; + uint64_t src_iova; + uint8_t *dest_ptr; + uint64_t dest_iova; + struct he_lbk_dsm_status *status_ptr; +}; + +struct he_lbk_priv { + struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg; + struct he_lbk_ctx he_lbk_ctx; +}; + +#endif /* _HE_LBK_H_ */ diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c new file mode 100644 index 0000000..ccbb3a8 --- /dev/null +++ b/drivers/raw/afu_mf/he_mem.c @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "afu_mf_rawdev.h" +#include "he_mem.h" + +static int he_mem_tg_test(struct afu_mf_rawdev *dev) +{ + struct he_mem_tg_priv *priv = NULL; + struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL; + struct he_mem_tg_ctx *ctx = NULL; + uint64_t value = 0x12345678; + uint64_t cap = 0; + uint64_t channel_mask = 0; + int i, t = 0; + + if (!dev) + return -EINVAL; + + priv = (struct he_mem_tg_priv *)dev->priv; + if (!priv) + return -ENOENT; + + cfg = &priv->he_mem_tg_cfg; + ctx = &priv->he_mem_tg_ctx; + + AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask); + + rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD); + cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD); + AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap); + if (cap != value) { + AFU_MF_PMD_ERR("Test scratchpad register failed"); + return -EIO; + } + + cap = rte_read64(ctx->addr + MEM_TG_CTRL); + AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap); + + channel_mask = cfg->channel_mask & cap; + /* start traffic generators */ + rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL); + + /* check test status */ + while (t < MEM_TG_TIMEOUT_MS) { + value = rte_read64(ctx->addr + MEM_TG_STAT); + for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) { + if (channel_mask & (1 << i)) { + if (TGACTIVE(value, i)) + continue; + printf("TG channel %d test %s\n", i, + TGPASS(value, i) ? "pass" : + TGTIMEOUT(value, i) ? "timeout" : + TGFAIL(value, i) ? "fail" : "error"); + channel_mask &= ~(1 << i); + } + } + if (!channel_mask) + break; + rte_delay_ms(MEM_TG_POLL_INTERVAL_MS); + t += MEM_TG_POLL_INTERVAL_MS; + } + + if (channel_mask) { + AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value); + return channel_mask; + } + + return 0; +} + +static int he_mem_tg_init(struct afu_mf_rawdev *dev) +{ + struct he_mem_tg_priv *priv = NULL; + struct he_mem_tg_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct he_mem_tg_priv *)dev->priv; + if (!priv) { + priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0); + if (!priv) + return -ENOMEM; + dev->priv = priv; + } + + ctx = &priv->he_mem_tg_ctx; + ctx->addr = (uint8_t *)dev->addr; + + return 0; +} + +static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config, + size_t config_size) +{ + struct he_mem_tg_priv *priv = NULL; + + if (!dev || !config || !config_size) + return -EINVAL; + + priv = (struct he_mem_tg_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg)) + return -EINVAL; + + rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv->he_mem_tg_cfg)); + + return 0; +} + +static int he_mem_tg_close(struct afu_mf_rawdev *dev) +{ + if (!dev) + return -EINVAL; + + rte_free(dev->priv); + dev->priv = NULL; + + return 0; +} + +static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f) +{ + struct he_mem_tg_priv *priv = NULL; + struct he_mem_tg_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct he_mem_tg_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (!f) + f = stdout; + + ctx = &priv->he_mem_tg_ctx; + + fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr); + + return 0; +} + +static struct afu_mf_ops he_mem_tg_ops = { + .init = he_mem_tg_init, + .config = he_mem_tg_config, + .start = NULL, + .stop = NULL, + .test = he_mem_tg_test, + .close = he_mem_tg_close, + .dump = he_mem_tg_dump, + .reset = NULL +}; + +struct afu_mf_drv he_mem_tg_drv = { + .uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H }, + .ops = &he_mem_tg_ops +}; diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h new file mode 100644 index 0000000..82404b6 --- /dev/null +++ b/drivers/raw/afu_mf/he_mem.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#ifndef _HE_MEM_H_ +#define _HE_MEM_H_ + +#include "afu_mf_rawdev.h" +#include "rte_pmd_afu.h" + +#define HE_MEM_TG_UUID_L 0xa3dc5b831f5cecbb +#define HE_MEM_TG_UUID_H 0x4dadea342c7848cb + +#define NUM_MEM_TG_CHANNELS 4 +#define MEM_TG_TIMEOUT_MS 5000 +#define MEM_TG_POLL_INTERVAL_MS 10 + +extern struct afu_mf_drv he_mem_tg_drv; + +/* MEM-TG registers definition */ +#define MEM_TG_SCRATCHPAD 0x28 +#define MEM_TG_CTRL 0x30 +#define TGCONTROL(n) (1 << (n)) +#define MEM_TG_STAT 0x38 +#define TGSTATUS(v, n) (((v) >> (n << 2)) & 0xf) +#define TGPASS(v, n) (((v) >> ((n << 2) + 3)) & 0x1) +#define TGFAIL(v, n) (((v) >> ((n << 2) + 2)) & 0x1) +#define TGTIMEOUT(v, n) (((v) >> ((n << 2) + 1)) & 0x1) +#define TGACTIVE(v, n) (((v) >> (n << 2)) & 0x1) + +struct he_mem_tg_ctx { + uint8_t *addr; +}; + +struct he_mem_tg_priv { + struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg; + struct he_mem_tg_ctx he_mem_tg_ctx; +}; + +#endif /* _HE_MEM_H_ */ diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build new file mode 100644 index 0000000..f304bc8 --- /dev/null +++ b/drivers/raw/afu_mf/meson.build @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2022 Intel Corporation + +deps += ['rawdev', 'bus_pci', 'bus_ifpga'] +sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c', + 'he_hssi.c') + +headers = files('rte_pmd_afu.h') diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c new file mode 100644 index 0000000..420e84a --- /dev/null +++ b/drivers/raw/afu_mf/n3000_afu.c @@ -0,0 +1,1997 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "afu_mf_rawdev.h" +#include "n3000_afu.h" + +static int nlb_afu_config(struct afu_mf_rawdev *dev) +{ + struct n3000_afu_priv *priv = NULL; + struct rte_pmd_afu_nlb_cfg *cfg = NULL; + struct nlb_csr_cfg v; + + if (!dev) + return -EINVAL; + + if (!dev->priv) + return -ENOENT; + + priv = (struct n3000_afu_priv *)dev->priv; + cfg = &priv->nlb_cfg; + + v.csr = 0; + + if (cfg->cont) + v.cont = 1; + + if (cfg->cache_policy == NLB_WRPUSH_I) + v.wrpush_i = 1; + else + v.wrthru_en = cfg->cache_policy; + + if (cfg->cache_hint == NLB_RDLINE_MIXED) + v.rdsel = 3; + else + v.rdsel = cfg->cache_hint; + + v.mode = cfg->mode; + v.chsel = cfg->read_vc; + v.wr_chsel = cfg->write_vc; + v.wrfence_chsel = cfg->wrfence_vc; + v.wrthru_en = cfg->cache_policy; + v.multicl_len = cfg->multi_cl - 1; + + AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr); + rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG); + + return 0; +} + +static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl) +{ + struct n3000_afu_priv *priv = NULL; + struct rte_pmd_afu_nlb_cfg *cfg = NULL; + struct nlb_dsm_status *stat = NULL; + uint64_t ticks = 0; + double num, rd_bw, wr_bw; + + if (!dev || !dev->priv) + return; + + priv = (struct n3000_afu_priv *)dev->priv; + + cfg = &priv->nlb_cfg; + stat = priv->nlb_ctx.status_ptr; + + if (cfg->cont) + ticks = stat->num_clocks - stat->start_overhead; + else + ticks = stat->num_clocks - + (stat->start_overhead + stat->end_overhead); + + if (cfg->freq_mhz == 0) + cfg->freq_mhz = 200; + + num = (double)stat->num_reads; + rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks; + num = (double)stat->num_writes; + wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks; + + printf("Cachelines Read_Count Write_Count Clocks@%uMHz " + "Rd_Bandwidth Wr_Bandwidth\n", cfg->freq_mhz); + printf("%10u %10u %11u %12lu %7.3f GB/s %7.3f GB/s\n", cl, + stat->num_reads, stat->num_writes, ticks, + rd_bw / 1e9, wr_bw / 1e9); +} + +static int nlb_afu_test(struct afu_mf_rawdev *dev) +{ + struct n3000_afu_priv *priv = NULL; + struct nlb_afu_ctx *ctx = NULL; + struct rte_pmd_afu_nlb_cfg *cfg = NULL; + struct nlb_csr_ctl ctl; + uint32_t *ptr = NULL; + uint32_t i, j, cl, val = 0; + uint64_t sval = 0; + int ret = 0; + + if (!dev) + return -EINVAL; + + if (!dev->priv) + return -ENOENT; + + priv = (struct n3000_afu_priv *)dev->priv; + ctx = &priv->nlb_ctx; + cfg = &priv->nlb_cfg; + + /* initialize registers */ + AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova); + rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL); + + ctl.csr = 0; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + ctl.reset = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + + AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova); + rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova), + ctx->addr + CSR_SRC_ADDR); + AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova); + rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova), + ctx->addr + CSR_DST_ADDR); + + ret = nlb_afu_config(dev); + if (ret) + return ret; + + /* initialize src data */ + ptr = (uint32_t *)ctx->src_ptr; + j = CACHE_LINE_SIZE(cfg->end) >> 2; + for (i = 0; i < j; i++) + *ptr++ = i; + + /* start test */ + for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) { + memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl)); + memset(ctx->dsm_ptr, 0, DSM_SIZE); + + ctl.csr = 0; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + ctl.reset = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + + rte_write32(cl, ctx->addr + CSR_NUM_LINES); + + rte_delay_us(10); + + ctl.start = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + + if (cfg->cont) { + rte_delay_ms(cfg->timeout * 1000); + ctl.force_completion = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + ret = dsm_poll_timeout(&ctx->status_ptr->test_complete, + val, (val & 0x1) == 1, DSM_POLL_INTERVAL, + DSM_TIMEOUT); + if (ret) { + printf("DSM poll timeout\n"); + goto end; + } + } else { + ret = dsm_poll_timeout(&ctx->status_ptr->test_complete, + val, (val & 0x1) == 1, DSM_POLL_INTERVAL, + DSM_TIMEOUT); + if (ret) { + printf("DSM poll timeout\n"); + goto end; + } + ctl.force_completion = 1; + rte_write32(ctl.csr, ctx->addr + CSR_CTL); + } + + nlb_afu_report(dev, cl); + + i = 0; + while (i++ < 100) { + sval = rte_read64(ctx->addr + CSR_STATUS1); + if (sval == 0) + break; + rte_delay_us(1000); + } + + ptr = (uint32_t *)ctx->dest_ptr; + j = CACHE_LINE_SIZE(cl) >> 2; + for (i = 0; i < j; i++) { + if (*ptr++ != i) { + AFU_MF_PMD_ERR("Data mismatch @ %u", i); + break; + } + } + } + +end: + return ret; +} + +static void dma_afu_buf_free(struct dma_afu_ctx *ctx) +{ + int i = 0; + + if (!ctx) + return; + + for (i = 0; i < NUM_DMA_BUF; i++) { + rte_free(ctx->dma_buf[i]); + ctx->dma_buf[i] = NULL; + } + + rte_free(ctx->data_buf); + ctx->data_buf = NULL; + + rte_free(ctx->ref_buf); + ctx->ref_buf = NULL; +} + +static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx, + struct rte_pmd_afu_dma_cfg *cfg) +{ + size_t page_sz = sysconf(_SC_PAGE_SIZE); + int i, ret = 0; + + if (!ctx || !cfg) + return -EINVAL; + + for (i = 0; i < NUM_DMA_BUF; i++) { + ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size, + TEST_MEM_ALIGN); + if (!ctx->dma_buf[i]) { + ret = -ENOMEM; + goto free; + } + ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]); + if (ctx->dma_iova[i] == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto free; + } + } + + ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz); + if (!ctx->data_buf) { + ret = -ENOMEM; + goto free; + } + + ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz); + if (!ctx->ref_buf) { + ret = -ENOMEM; + goto free; + } + + return 0; + +free: + dma_afu_buf_free(ctx); + return ret; +} + +static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size) +{ + int *ptr = NULL; + size_t i = 0; + size_t dword_size = 0; + + if (!ctx || !size) + return; + + ptr = (int *)ctx->ref_buf; + + if (ctx->pattern) { + memset(ptr, ctx->pattern, size); + } else { + srand(99); + dword_size = size >> 2; + for (i = 0; i < dword_size; i++) + *ptr++ = rand(); + } + rte_memcpy(ctx->data_buf, ctx->ref_buf, size); +} + +static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size) +{ + uint8_t *src = NULL; + uint8_t *dst = NULL; + size_t i = 0; + int n = 0; + + if (!ctx || !size) + return -EINVAL; + + src = (uint8_t *)ctx->ref_buf; + dst = (uint8_t *)ctx->data_buf; + + if (memcmp(src, dst, size)) { + printf("Transfer is corrupted\n"); + if (ctx->verbose) { + for (i = 0; i < size; i++) { + if (*src != *dst) { + if (++n >= ERR_CHECK_LIMIT) + break; + printf("Mismatch at 0x%zx, " + "Expected %02x Actual %02x\n", + i, *src, *dst); + } + src++; + dst++; + } + if (n < ERR_CHECK_LIMIT) { + printf("Found %d error bytes\n", n); + } else { + printf("......\n"); + printf("Found more than %d error bytes\n", n); + } + } + return -1; + } + + printf("Transfer is verified\n"); + return 0; +} + +static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes) +{ + uint64_t qwords = bytes / sizeof(uint64_t); + + if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) || + !IS_ALIGNED_QWORD((uint64_t)bytes)) + return; + + for (; qwords > 0; qwords--, host_addr++, dev_addr++) + rte_write64(*host_addr, dev_addr); +} + +static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes) +{ + uint64_t qwords = bytes / sizeof(uint64_t); + + if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) || + !IS_ALIGNED_QWORD((uint64_t)bytes)) + return; + + for (; qwords > 0; qwords--, host_addr++, dev_addr++) + *host_addr = rte_read64(dev_addr); +} + +static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr) +{ + uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK; + + if (!ctx) + return; + + if (requested_page != ctx->cur_ase_page) { + rte_write64(requested_page, ctx->ase_ctrl_addr); + ctx->cur_ase_page = requested_page; + } +} + +static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr, + uint64_t host_addr, uint32_t count) +{ + uint64_t dev_aligned_addr = 0; + uint64_t shift = 0; + uint64_t val = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr, + dev_addr, count); + + if (!ctx || (count >= QWORD_BYTES)) + return -EINVAL; + + if (!count) + return 0; + + switch_ase_page(ctx, dev_addr); + + shift = dev_addr % QWORD_BYTES; + dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK; + val = rte_read64(ctx->ase_data_addr + dev_aligned_addr); + rte_memcpy(((char *)(&val)) + shift, (void *)host_addr, count); + + /* write back to device */ + rte_write64(val, ctx->ase_data_addr + dev_aligned_addr); + + return 0; +} + +static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr, + uint64_t *src_ptr, uint64_t *count) +{ + uint64_t src = *src_ptr; + uint64_t dst = *dst_ptr; + uint64_t align_bytes = *count; + uint64_t offset = 0; + uint64_t left_in_page = DMA_ASE_WINDOW; + uint64_t size_to_copy = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst, + align_bytes); + + if (!ctx || !IS_ALIGNED_DWORD(dst)) + return -EINVAL; + + if (align_bytes < DWORD_BYTES) + return 0; + + if (!IS_ALIGNED_QWORD(dst)) { + /* Write out a single DWORD to get QWORD aligned */ + switch_ase_page(ctx, dst); + offset = dst & DMA_ASE_WINDOW_MASK; + + rte_write32(*(uint32_t *)src, ctx->ase_data_addr + offset); + src += DWORD_BYTES; + dst += DWORD_BYTES; + align_bytes -= DWORD_BYTES; + } + + if (!align_bytes) + return 0; + + /* Write out blocks of 64-bit values */ + while (align_bytes >= QWORD_BYTES) { + left_in_page -= dst & DMA_ASE_WINDOW_MASK; + size_to_copy = + MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1))); + if (size_to_copy < QWORD_BYTES) + break; + switch_ase_page(ctx, dst); + offset = dst & DMA_ASE_WINDOW_MASK; + blk_write64((uint64_t *)(ctx->ase_data_addr + offset), + (uint64_t *)src, size_to_copy); + src += size_to_copy; + dst += size_to_copy; + align_bytes -= size_to_copy; + } + + if (align_bytes >= DWORD_BYTES) { + /* Write out remaining DWORD */ + switch_ase_page(ctx, dst); + offset = dst & DMA_ASE_WINDOW_MASK; + rte_write32(*(uint32_t *)src, ctx->ase_data_addr + offset); + src += DWORD_BYTES; + dst += DWORD_BYTES; + align_bytes -= DWORD_BYTES; + } + + *src_ptr = src; + *dst_ptr = dst; + *count = align_bytes; + + return 0; +} + +static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr, + uint64_t *src_ptr, uint64_t count) +{ + uint64_t dst = *dst_ptr; + uint64_t src = *src_ptr; + uint64_t count_left = count; + uint64_t unaligned_size = 0; + int ret = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst, + count); + + /* aligns address to 8 byte using dst masking method */ + if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) { + unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES); + if (unaligned_size > count_left) + unaligned_size = count_left; + ret = ase_write_unaligned(ctx, dst, src, unaligned_size); + if (ret) + return ret; + count_left -= unaligned_size; + src += unaligned_size; + dst += unaligned_size; + } + + /* Handles 8/4 byte MMIO transfer */ + ret = ase_write(ctx, &dst, &src, &count_left); + if (ret) + return ret; + + /* Left over unaligned bytes transferred using dst masking method */ + unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES); + if (unaligned_size > count_left) + unaligned_size = count_left; + + ret = ase_write_unaligned(ctx, dst, src, unaligned_size); + if (ret) + return ret; + + count_left -= unaligned_size; + *dst_ptr = dst + unaligned_size; + *src_ptr = src + unaligned_size; + + return 0; +} + +static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr, + uint64_t host_addr, uint32_t count) +{ + uint64_t dev_aligned_addr = 0; + uint64_t shift = 0; + uint64_t val = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr, + dev_addr, count); + + if (!ctx || (count >= QWORD_BYTES)) + return -EINVAL; + + if (!count) + return 0; + + switch_ase_page(ctx, dev_addr); + + shift = dev_addr % QWORD_BYTES; + dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK; + val = rte_read64(ctx->ase_data_addr + dev_aligned_addr); + rte_memcpy((void *)host_addr, ((char *)(&val)) + shift, count); + + return 0; +} + +static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr, + uint64_t *dst_ptr, uint64_t *count) +{ + uint64_t src = *src_ptr; + uint64_t dst = *dst_ptr; + uint64_t align_bytes = *count; + uint64_t offset = 0; + uint64_t left_in_page = DMA_ASE_WINDOW; + uint64_t size_to_copy = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src, + align_bytes); + + if (!ctx || !IS_ALIGNED_DWORD(src)) + return -EINVAL; + + if (align_bytes < DWORD_BYTES) + return 0; + + if (!IS_ALIGNED_QWORD(src)) { + /* Read a single DWORD to get QWORD aligned */ + switch_ase_page(ctx, src); + offset = src & DMA_ASE_WINDOW_MASK; + *(uint32_t *)dst = rte_read32(ctx->ase_data_addr + offset); + src += DWORD_BYTES; + dst += DWORD_BYTES; + align_bytes -= DWORD_BYTES; + } + + if (!align_bytes) + return 0; + + /* Read blocks of 64-bit values */ + while (align_bytes >= QWORD_BYTES) { + left_in_page -= src & DMA_ASE_WINDOW_MASK; + size_to_copy = + MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1))); + if (size_to_copy < QWORD_BYTES) + break; + switch_ase_page(ctx, src); + offset = src & DMA_ASE_WINDOW_MASK; + blk_read64((uint64_t *)(ctx->ase_data_addr + offset), + (uint64_t *)dst, size_to_copy); + src += size_to_copy; + dst += size_to_copy; + align_bytes -= size_to_copy; + } + + if (align_bytes >= DWORD_BYTES) { + /* Read remaining DWORD */ + switch_ase_page(ctx, src); + offset = src & DMA_ASE_WINDOW_MASK; + *(uint32_t *)dst = rte_read32(ctx->ase_data_addr + offset); + src += DWORD_BYTES; + dst += DWORD_BYTES; + align_bytes -= DWORD_BYTES; + } + + *src_ptr = src; + *dst_ptr = dst; + *count = align_bytes; + + return 0; +} + +static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr, + uint64_t *dst_ptr, uint64_t count) +{ + uint64_t src = *src_ptr; + uint64_t dst = *dst_ptr; + uint64_t count_left = count; + uint64_t unaligned_size = 0; + int ret = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst, + count); + + /* Aligns address to 8 byte using src masking method */ + if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) { + unaligned_size = QWORD_BYTES - (src % QWORD_BYTES); + if (unaligned_size > count_left) + unaligned_size = count_left; + ret = ase_read_unaligned(ctx, src, dst, unaligned_size); + if (ret) + return ret; + count_left -= unaligned_size; + dst += unaligned_size; + src += unaligned_size; + } + + /* Handles 8/4 byte MMIO transfer */ + ret = ase_read(ctx, &src, &dst, &count_left); + if (ret) + return ret; + + /* Left over unaligned bytes transferred using src masking method */ + unaligned_size = QWORD_BYTES - (src % QWORD_BYTES); + if (unaligned_size > count_left) + unaligned_size = count_left; + + ret = ase_read_unaligned(ctx, src, dst, unaligned_size); + if (ret) + return ret; + + count_left -= unaligned_size; + *dst_ptr = dst + unaligned_size; + *src_ptr = src + unaligned_size; + + return 0; +} + +static void clear_interrupt(struct dma_afu_ctx *ctx) +{ + /* clear interrupt by writing 1 to IRQ bit in status register */ + msgdma_status status; + + if (!ctx) + return; + + status.csr = 0; + status.irq = 1; + rte_write32(status.csr, CSR_STATUS(ctx->csr_addr)); +} + +static int poll_interrupt(struct dma_afu_ctx *ctx) +{ + struct pollfd pfd = {0}; + uint64_t count = 0; + ssize_t bytes_read = 0; + int poll_ret = 0; + int ret = 0; + + if (!ctx || (ctx->event_fd < 0)) + return -EINVAL; + + pfd.fd = ctx->event_fd; + pfd.events = POLLIN; + poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC); + if (poll_ret < 0) { + AFU_MF_PMD_ERR("Error %s", strerror(errno)); + ret = -EFAULT; + goto out; + } else if (poll_ret == 0) { + AFU_MF_PMD_ERR("Timeout"); + ret = -ETIME; + } else { + bytes_read = read(pfd.fd, &count, sizeof(count)); + if (bytes_read > 0) { + if (ctx->verbose) + AFU_MF_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64, + poll_ret, count); + ret = 0; + } else { + AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ? + strerror(errno) : "zero bytes read"); + ret = -EIO; + } + } +out: + clear_interrupt(ctx); + return ret; +} + +static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc) +{ + msgdma_status status; + uint64_t fpga_queue_full = 0; + + if (!ctx) + return; + + if (ctx->verbose) { + AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x", + desc->rd_address_ext, desc->rd_address); + AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x", + desc->wr_address_ext, desc->wr_address); + AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len); + AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u", + desc->wr_burst_count); + AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u", + desc->rd_burst_count); + AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride); + AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride); + } + + do { + status.csr = rte_read32(CSR_STATUS(ctx->csr_addr)); + if (fpga_queue_full++ > 100000000) { + AFU_MF_PMD_DEBUG("DMA queue full retry"); + fpga_queue_full = 0; + } + } while (status.desc_buf_full); + + blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc, + sizeof(*desc)); +} + +static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src, + int count, int is_last_desc, fpga_dma_type type, int intr_en) +{ + msgdma_ext_desc *desc = NULL; + int alignment_offset = 0; + int segment_size = 0; + + if (!ctx) + return -EINVAL; + + /* src, dst and count must be 64-byte aligned */ + if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) || + !IS_DMA_ALIGNED(count)) + return -EINVAL; + memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc)); + + /* these fields are fixed for all DMA transfers */ + desc = ctx->desc_buf; + desc->seq_num = 0; + desc->wr_stride = 1; + desc->rd_stride = 1; + desc->control.go = 1; + if (intr_en) + desc->control.transfer_irq_en = 1; + else + desc->control.transfer_irq_en = 0; + + if (!is_last_desc) + desc->control.early_done_en = 1; + else + desc->control.early_done_en = 0; + + if (type == FPGA_TO_FPGA) { + desc->rd_address = src & DMA_MASK_32_BIT; + desc->wr_address = dst & DMA_MASK_32_BIT; + desc->len = count; + desc->wr_burst_count = 4; + desc->rd_burst_count = 4; + desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT; + desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT; + send_descriptor(ctx, desc); + } else { + /* check CCIP (host) address is aligned to 4CL (256B) */ + alignment_offset = (type == HOST_TO_FPGA) + ? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES); + /* performing a short transfer to get aligned */ + if (alignment_offset != 0) { + desc->rd_address = src & DMA_MASK_32_BIT; + desc->wr_address = dst & DMA_MASK_32_BIT; + desc->wr_burst_count = 1; + desc->rd_burst_count = 1; + desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT; + desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT; + /* count isn't large enough to hit next 4CL boundary */ + if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) { + segment_size = count; + count = 0; + } else { + segment_size = CCIP_ALIGN_BYTES + - alignment_offset; + src += segment_size; + dst += segment_size; + count -= segment_size; + desc->control.transfer_irq_en = 0; + } + /* post short transfer to align to a 4CL (256 byte) */ + desc->len = segment_size; + send_descriptor(ctx, desc); + } + /* at this point we are 4CL (256 byte) aligned */ + if (count >= CCIP_ALIGN_BYTES) { + desc->rd_address = src & DMA_MASK_32_BIT; + desc->wr_address = dst & DMA_MASK_32_BIT; + desc->wr_burst_count = 4; + desc->rd_burst_count = 4; + desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT; + desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT; + /* buffer ends on 4CL boundary */ + if ((count % CCIP_ALIGN_BYTES) == 0) { + segment_size = count; + count = 0; + } else { + segment_size = count + - (count % CCIP_ALIGN_BYTES); + src += segment_size; + dst += segment_size; + count -= segment_size; + desc->control.transfer_irq_en = 0; + } + desc->len = segment_size; + send_descriptor(ctx, desc); + } + /* post short transfer to handle the remainder */ + if (count > 0) { + desc->rd_address = src & DMA_MASK_32_BIT; + desc->wr_address = dst & DMA_MASK_32_BIT; + desc->len = count; + desc->wr_burst_count = 1; + desc->rd_burst_count = 1; + desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT; + desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT; + if (intr_en) + desc->control.transfer_irq_en = 1; + send_descriptor(ctx, desc); + } + } + + return 0; +} + +static int issue_magic(struct dma_afu_ctx *ctx) +{ + *(ctx->magic_buf) = 0ULL; + return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova), + DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1); +} + +static void wait_magic(struct dma_afu_ctx *ctx) +{ + int magic_timeout = 0; + + if (!ctx) + return; + + poll_interrupt(ctx); + while (*(ctx->magic_buf) != DMA_WF_MAGIC) { + if (magic_timeout++ > 1000) { + AFU_MF_PMD_ERR("DMA magic operation timeout"); + magic_timeout = 0; + break; + } + } + *(ctx->magic_buf) = 0ULL; +} + +static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src, + uint64_t chunk, int is_last_chunk, int *intr_issued) +{ + int intr_en = 0; + int ret = 0; + + if (!ctx || !intr_issued) + return -EINVAL; + + src += chunk * ctx->dma_buf_size; + dst += chunk * ctx->dma_buf_size; + + if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) { + if (*intr_issued) { + ret = poll_interrupt(ctx); + if (ret) + return ret; + } + intr_en = 1; + } + + chunk %= NUM_DMA_BUF; + rte_memcpy(ctx->dma_buf[chunk], (void *)src, ctx->dma_buf_size); + ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]), + ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en); + if (intr_en) + *intr_issued = 1; + + return ret; +} + +static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src, + size_t count) +{ + uint64_t i = 0; + uint64_t count_left = count; + uint64_t aligned_addr = 0; + uint64_t align_bytes = 0; + uint64_t dma_chunks = 0; + uint64_t dma_tx_bytes = 0; + uint64_t offset = 0; + int issued_intr = 0; + int ret = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst, + count); + + if (!ctx) + return -EINVAL; + + if (!IS_DMA_ALIGNED(dst)) { + if (count_left < DMA_ALIGN_BYTES) + return ase_host_to_fpga(ctx, &dst, &src, count_left); + + aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1) + * DMA_ALIGN_BYTES; + align_bytes = aligned_addr - dst; + ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes); + if (ret) + return ret; + count_left = count_left - align_bytes; + } + + if (count_left) { + dma_chunks = count_left / ctx->dma_buf_size; + offset = dma_chunks * ctx->dma_buf_size; + count_left -= offset; + AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64 + " (%"PRIu64"...0x%"PRIx64")", + src, dst, dma_chunks, count_left); + for (i = 0; i < dma_chunks; i++) { + ret = dma_tx_buf(ctx, dst, src, i, + i == (dma_chunks - 1), &issued_intr); + if (ret) + return ret; + } + + if (issued_intr) { + ret = poll_interrupt(ctx); + if (ret) + return ret; + } + + if (count_left) { + i = count_left / DMA_ALIGN_BYTES; + if (i > 0) { + dma_tx_bytes = i * DMA_ALIGN_BYTES; + AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", + dma_tx_bytes); + rte_memcpy(ctx->dma_buf[0], + (void *)(src + offset), + dma_tx_bytes); + ret = do_dma(ctx, dst + offset, + DMA_HOST_ADDR(ctx->dma_iova[0]), + dma_tx_bytes, 1, HOST_TO_FPGA, 1); + if (ret) + return ret; + ret = poll_interrupt(ctx); + if (ret) + return ret; + } + + count_left -= dma_tx_bytes; + if (count_left) { + AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE", + count_left); + dst += offset + dma_tx_bytes; + src += offset + dma_tx_bytes; + ret = ase_host_to_fpga(ctx, &dst, &src, + count_left); + } + } + } + + return ret; +} + +static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src, + uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued) +{ + uint64_t i = chunk % NUM_DMA_BUF; + uint64_t n = *rx_count; + uint64_t num_pending = 0; + int ret = 0; + + if (!ctx || !wf_issued) + return -EINVAL; + + ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]), + src + chunk * ctx->dma_buf_size, + ctx->dma_buf_size, 1, FPGA_TO_HOST, 0); + if (ret) + return ret; + + num_pending = chunk - n + 1; + if (num_pending == HALF_DMA_BUF) { + ret = issue_magic(ctx); + if (ret) { + AFU_MF_PMD_DEBUG("Magic issue failed"); + return ret; + } + *wf_issued = 1; + } + + if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) { + if (*wf_issued) { + wait_magic(ctx); + for (i = 0; i < HALF_DMA_BUF; i++) { + rte_memcpy((void *)(dst + + n * ctx->dma_buf_size), + ctx->dma_buf[n % NUM_DMA_BUF], + ctx->dma_buf_size); + n++; + } + *wf_issued = 0; + *rx_count = n; + } + ret = issue_magic(ctx); + if (ret) { + AFU_MF_PMD_DEBUG("Magic issue failed"); + return ret; + } + *wf_issued = 1; + } + + return ret; +} + +static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src, + size_t count) +{ + uint64_t i = 0; + uint64_t count_left = count; + uint64_t aligned_addr = 0; + uint64_t align_bytes = 0; + uint64_t dma_chunks = 0; + uint64_t pending_buf = 0; + uint64_t dma_rx_bytes = 0; + uint64_t offset = 0; + int wf_issued = 0; + int ret = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst, + count); + + if (!ctx) + return -EINVAL; + + if (!IS_DMA_ALIGNED(src)) { + if (count_left < DMA_ALIGN_BYTES) + return ase_fpga_to_host(ctx, &src, &dst, count_left); + + aligned_addr = ((src / DMA_ALIGN_BYTES) + 1) + * DMA_ALIGN_BYTES; + align_bytes = aligned_addr - src; + ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes); + if (ret) + return ret; + count_left = count_left - align_bytes; + } + + if (count_left) { + dma_chunks = count_left / ctx->dma_buf_size; + offset = dma_chunks * ctx->dma_buf_size; + count_left -= offset; + AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64 + " (%"PRIu64"...0x%"PRIx64")", + src, dst, dma_chunks, count_left); + for (i = 0; i < dma_chunks; i++) { + ret = dma_rx_buf(ctx, dst, src, i, + i == (dma_chunks - 1), + &pending_buf, &wf_issued); + if (ret) + return ret; + } + + if (wf_issued) + wait_magic(ctx); + + /* clear out final dma memcpy operations */ + while (pending_buf < dma_chunks) { + /* constant size transfer; no length check required */ + rte_memcpy((void *)(dst + + pending_buf * ctx->dma_buf_size), + ctx->dma_buf[pending_buf % NUM_DMA_BUF], + ctx->dma_buf_size); + pending_buf++; + } + + if (count_left > 0) { + i = count_left / DMA_ALIGN_BYTES; + if (i > 0) { + dma_rx_bytes = i * DMA_ALIGN_BYTES; + AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", + dma_rx_bytes); + ret = do_dma(ctx, + DMA_HOST_ADDR(ctx->dma_iova[0]), + src + offset, + dma_rx_bytes, 1, FPGA_TO_HOST, 0); + if (ret) + return ret; + ret = issue_magic(ctx); + if (ret) + return ret; + wait_magic(ctx); + rte_memcpy((void *)(dst + offset), + ctx->dma_buf[0], dma_rx_bytes); + } + + count_left -= dma_rx_bytes; + if (count_left) { + AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE", + count_left); + dst += offset + dma_rx_bytes; + src += offset + dma_rx_bytes; + ret = ase_fpga_to_host(ctx, &src, &dst, + count_left); + } + } + } + + return ret; +} + +static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src, + size_t count) +{ + uint64_t i = 0; + uint64_t count_left = count; + uint64_t dma_chunks = 0; + uint64_t offset = 0; + uint32_t tx_chunks = 0; + uint64_t *tmp_buf = NULL; + int ret = 0; + + AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst, + count); + + if (!ctx) + return -EINVAL; + + if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src) + && IS_DMA_ALIGNED(count_left)) { + dma_chunks = count_left / ctx->dma_buf_size; + offset = dma_chunks * ctx->dma_buf_size; + count_left -= offset; + AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64 + " (%"PRIu64"...0x%"PRIx64")", + src, dst, dma_chunks, count_left); + for (i = 0; i < dma_chunks; i++) { + ret = do_dma(ctx, dst + i * ctx->dma_buf_size, + src + i * ctx->dma_buf_size, + ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0); + if (ret) + return ret; + if ((((i + 1) % NUM_DMA_BUF) == 0) || + (i == (dma_chunks - 1))) { + ret = issue_magic(ctx); + if (ret) + return ret; + wait_magic(ctx); + } + } + + if (count_left > 0) { + AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left); + ret = do_dma(ctx, dst + offset, src + offset, + count_left, 1, FPGA_TO_FPGA, 0); + if (ret) + return ret; + ret = issue_magic(ctx); + if (ret) + return ret; + wait_magic(ctx); + } + } else { + if ((src < dst) && (src + count_left > dst)) { + AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64 + " -> 0x%"PRIx64" (0x%"PRIx64")", + src, dst, count_left); + return -EINVAL; + } + tx_chunks = count_left / ctx->dma_buf_size; + offset = tx_chunks * ctx->dma_buf_size; + count_left -= offset; + AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64 + " (%u...0x%"PRIx64")", + src, dst, tx_chunks, count_left); + tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size, + DMA_ALIGN_BYTES); + for (i = 0; i < tx_chunks; i++) { + ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf, + src + i * ctx->dma_buf_size, + ctx->dma_buf_size); + if (ret) + goto free_buf; + ret = dma_host_to_fpga(ctx, + dst + i * ctx->dma_buf_size, + (uint64_t)tmp_buf, ctx->dma_buf_size); + if (ret) + goto free_buf; + } + + if (count_left > 0) { + ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf, + src + offset, count_left); + if (ret) + goto free_buf; + ret = dma_host_to_fpga(ctx, dst + offset, + (uint64_t)tmp_buf, count_left); + if (ret) + goto free_buf; + } +free_buf: + rte_free(tmp_buf); + } + + return ret; +} + +static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst, + uint64_t src, size_t count, fpga_dma_type type) +{ + int ret = 0; + + if (!ctx) + return -EINVAL; + + if (type == HOST_TO_FPGA) + ret = dma_host_to_fpga(ctx, dst, src, count); + else if (type == FPGA_TO_HOST) + ret = dma_fpga_to_host(ctx, dst, src, count); + else if (type == FPGA_TO_FPGA) + ret = dma_fpga_to_fpga(ctx, dst, src, count); + else + return -EINVAL; + + return ret; +} + +static double getTime(struct timespec start, struct timespec end) +{ + uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec) + + end.tv_nsec - start.tv_nsec; + return (double)diff / (double)1000000000L; +} + +#define SWEEP_ITERS 1 +static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length, + uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement) +{ + struct timespec start, end; + uint64_t test_size = 0; + uint64_t *dma_buf_ptr = NULL; + double throughput, total_time = 0.0; + int i = 0; + int ret = 0; + + if (!ctx || !ctx->data_buf || !ctx->ref_buf) { + AFU_MF_PMD_ERR("Buffer for DMA test is not allocated"); + return -EINVAL; + } + + if (length < (buf_offset + size_decrement)) { + AFU_MF_PMD_ERR("Test length does not match unaligned parameter"); + return -EINVAL; + } + test_size = length - (buf_offset + size_decrement); + if ((ddr_offset + test_size) > ctx->mem_size) { + AFU_MF_PMD_ERR("Test is out of DDR memory space"); + return -EINVAL; + } + + dma_buf_ptr = (uint64_t *)((uint64_t)ctx->data_buf + buf_offset); + printf("Sweep Host %p to FPGA 0x%"PRIx64 + " with 0x%"PRIx64" bytes ...\n", + (void *)dma_buf_ptr, ddr_offset, test_size); + + for (i = 0; i < SWEEP_ITERS; i++) { + clock_gettime(CLOCK_MONOTONIC, &start); + ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr, + test_size, HOST_TO_FPGA); + clock_gettime(CLOCK_MONOTONIC, &end); + if (ret) { + AFU_MF_PMD_ERR("Failed"); + return ret; + } + total_time += getTime(start, end); + } + throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000); + printf("Measured bandwidth = %lf MB/s\n", throughput); + + printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n", + ddr_offset, (void *)dma_buf_ptr, test_size); + + total_time = 0.0; + memset((char *)dma_buf_ptr, 0, test_size); + for (i = 0; i < SWEEP_ITERS; i++) { + clock_gettime(CLOCK_MONOTONIC, &start); + ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset, + test_size, FPGA_TO_HOST); + clock_gettime(CLOCK_MONOTONIC, &end); + if (ret) { + AFU_MF_PMD_ERR("Failed"); + return ret; + } + total_time += getTime(start, end); + } + throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000); + printf("Measured bandwidth = %lf MB/s\n", throughput); + + printf("Verifying buffer ...\n"); + return dma_afu_buf_verify(ctx, test_size); +} + +static int dma_afu_test(struct afu_mf_rawdev *dev) +{ + struct n3000_afu_priv *priv = NULL; + struct dma_afu_ctx *ctx = NULL; + struct rte_pmd_afu_dma_cfg *cfg = NULL; + msgdma_ctrl ctrl; + uint64_t offset = 0; + uint32_t i = 0; + int ret = 0; + + if (!dev) + return -EINVAL; + + if (!dev->priv) + return -ENOENT; + + priv = (struct n3000_afu_priv *)dev->priv; + cfg = &priv->dma_cfg; + if (cfg->index >= NUM_N3000_DMA) + return -EINVAL; + ctx = &priv->dma_ctx[cfg->index]; + + ctx->pattern = (int)cfg->pattern; + ctx->verbose = (int)cfg->verbose; + ctx->dma_buf_size = cfg->size; + + ret = dma_afu_buf_alloc(ctx, cfg); + if (ret) + goto free; + + printf("Initialize test buffer\n"); + dma_afu_buf_init(ctx, cfg->length); + + /* enable interrupt */ + ctrl.csr = 0; + ctrl.global_intr_en_mask = 1; + rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr)); + + printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf, + cfg->offset, cfg->length); + ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf, + cfg->length, HOST_TO_FPGA); + if (ret) { + AFU_MF_PMD_ERR("Failed to transfer data from host to FPGA"); + goto end; + } + memset(ctx->data_buf, 0, cfg->length); + + printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset, + ctx->data_buf, cfg->length); + ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset, + cfg->length, FPGA_TO_HOST); + if (ret) { + AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host"); + goto end; + } + ret = dma_afu_buf_verify(ctx, cfg->length); + if (ret) + goto end; + + if ((cfg->offset + cfg->length * 2) <= ctx->mem_size) + offset = cfg->offset + cfg->length; + else if (cfg->offset > cfg->length) + offset = 0; + else + goto end; + + printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n", + cfg->offset, offset, cfg->length); + ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length, + FPGA_TO_FPGA); + if (ret) { + AFU_MF_PMD_ERR("Failed to transfer data from FPGA to FPGA"); + goto end; + } + + printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset, + ctx->data_buf, cfg->length); + ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset, + cfg->length, FPGA_TO_HOST); + if (ret) { + AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host"); + goto end; + } + ret = dma_afu_buf_verify(ctx, cfg->length); + if (ret) + goto end; + + printf("Sweep with aligned address and size\n"); + ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0); + if (ret) + goto end; + + if (cfg->unaligned) { + printf("Sweep with unaligned address and size\n"); + struct unaligned_set { + uint64_t addr_offset; + uint64_t size_dec; + } param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}}; + for (i = 0; i < ARRAY_SIZE(param); i++) { + ret = sweep_test(ctx, cfg->length, cfg->offset, + param[i].addr_offset, param[i].size_dec); + if (ret) + break; + } + } + +end: + /* disable interrupt */ + ctrl.global_intr_en_mask = 0; + rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr)); + +free: + dma_afu_buf_free(ctx); + return ret; +} + +static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev *dev) +{ + struct rte_afu_device *afudev = NULL; + + if (!dev || !dev->rawdev || !dev->rawdev->device) + return NULL; + + afudev = RTE_DEV_TO_AFU(dev->rawdev->device); + if (!afudev->rawdev || !afudev->rawdev->device) + return NULL; + + return RTE_DEV_TO_PCI(afudev->rawdev->device); +} + +static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start, + uint32_t count, int *efds) +{ + struct rte_pci_device *pci_dev = NULL; + struct vfio_irq_set *irq_set = NULL; + int vfio_dev_fd = 0; + size_t sz = 0; + int ret = 0; + + if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC)) + return -EINVAL; + + pci_dev = n3000_afu_get_pci_dev(dev); + if (!pci_dev) + return -ENODEV; + vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle); + + sz = sizeof(*irq_set) + sizeof(*efds) * count; + irq_set = rte_zmalloc(NULL, sz, 0); + if (!irq_set) + return -ENOMEM; + + irq_set->argsz = (uint32_t)sz; + irq_set->count = count; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | + VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = vec_start; + + rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count); + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + if (ret) + AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n"); + + rte_free(irq_set); + return ret; +} + +static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev) +{ + struct rte_pci_device *pci_dev = NULL; + uint8_t *addr = NULL; + uint64_t val = 0; + uint32_t bar = 0; + + pci_dev = n3000_afu_get_pci_dev(dev); + if (!pci_dev) + return NULL; + + addr = (uint8_t *)pci_dev->mem_resource[0].addr; + val = rte_read64(addr + PORT_ATTR_REG(dev->port)); + if (!PORT_IMPLEMENTED(val)) { + AFU_MF_PMD_INFO("FIU port %d is not implemented", dev->port); + return NULL; + } + + bar = PORT_BAR(val); + if (bar >= PCI_MAX_RESOURCE) { + AFU_MF_PMD_ERR("BAR index %u is out of limit", bar); + return NULL; + } + + addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val); + return addr; +} + +static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev, + uint32_t *vec_start, uint32_t *vec_count) +{ + uint8_t *addr = NULL; + uint64_t val = 0; + uint64_t header = 0; + uint64_t next_offset = 0; + + addr = (uint8_t *)n3000_afu_get_port_addr(dev); + if (!addr) + return -ENOENT; + + do { + addr += next_offset; + header = rte_read64(addr); + if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) && + (DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) { + val = rte_read64(addr + PORT_UINT_CAP_REG); + if (vec_start) + *vec_start = PORT_VEC_START(val); + if (vec_count) + *vec_count = PORT_VEC_COUNT(val); + return 0; + } + next_offset = DFH_NEXT_OFFSET(header); + if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0)) + break; + } while (!DFH_EOL(header)); + + return -ENOENT; +} + +static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev) +{ + struct n3000_afu_priv *priv = NULL; + struct nlb_afu_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct n3000_afu_priv *)dev->priv; + if (!priv) + return -ENOENT; + + ctx = &priv->nlb_ctx; + + rte_free(ctx->dsm_ptr); + ctx->dsm_ptr = NULL; + ctx->status_ptr = NULL; + + rte_free(ctx->src_ptr); + ctx->src_ptr = NULL; + + rte_free(ctx->dest_ptr); + ctx->dest_ptr = NULL; + + return 0; +} + +static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr) +{ + struct n3000_afu_priv *priv = NULL; + struct nlb_afu_ctx *ctx = NULL; + int ret = 0; + + if (!dev || !addr) + return -EINVAL; + + priv = (struct n3000_afu_priv *)dev->priv; + if (!priv) + return -ENOENT; + + ctx = &priv->nlb_ctx; + ctx->addr = addr; + + ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN); + if (!ctx->dsm_ptr) { + ret = -ENOMEM; + goto release; + } + ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr); + if (ctx->dsm_iova == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto release; + } + + ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE, + TEST_MEM_ALIGN); + if (!ctx->src_ptr) { + ret = -ENOMEM; + goto release; + } + ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr); + if (ctx->src_iova == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto release; + } + + ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE, + TEST_MEM_ALIGN); + if (!ctx->dest_ptr) { + ret = -ENOMEM; + goto release; + } + ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr); + if (ctx->dest_iova == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto release; + } + + ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS); + return 0; + +release: + nlb_afu_ctx_release(dev); + return ret; +} + +static int dma_afu_ctx_release(struct afu_mf_rawdev *dev) +{ + struct n3000_afu_priv *priv = NULL; + struct dma_afu_ctx *ctx = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct n3000_afu_priv *)dev->priv; + if (!priv) + return -ENOENT; + + ctx = &priv->dma_ctx[0]; + + rte_free(ctx->desc_buf); + ctx->desc_buf = NULL; + + rte_free(ctx->magic_buf); + ctx->magic_buf = NULL; + + close(ctx->event_fd); + return 0; +} + +static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t *addr) +{ + struct n3000_afu_priv *priv = NULL; + struct dma_afu_ctx *ctx = NULL; + uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000}; + static int efds[1] = {0}; + uint32_t vec_start = 0; + int ret = 0; + + if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr) + return -EINVAL; + + priv = (struct n3000_afu_priv *)dev->priv; + if (!priv) + return -ENOENT; + + ctx = &priv->dma_ctx[index]; + ctx->index = index; + ctx->addr = addr; + ctx->csr_addr = addr + DMA_CSR; + ctx->desc_addr = addr + DMA_DESC; + ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL; + ctx->ase_data_addr = addr + DMA_ASE_DATA; + ctx->mem_size = mem_sz[ctx->index]; + ctx->cur_ase_page = INVALID_ASE_PAGE; + if (ctx->index == 0) { + ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL); + if (ret) + return ret; + + efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (efds[0] < 0) { + AFU_MF_PMD_ERR("eventfd create failed"); + return -EBADF; + } + + if (dma_afu_set_irqs(dev, vec_start, 1, efds)) + AFU_MF_PMD_ERR("DMA interrupt setup failed"); + } + ctx->event_fd = efds[0]; + + ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL, + sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES); + if (!ctx->desc_buf) { + ret = -ENOMEM; + goto release; + } + + ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE, + TEST_MEM_ALIGN); + if (!ctx->magic_buf) { + ret = -ENOMEM; + goto release; + } + ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf); + if (ctx->magic_iova == RTE_BAD_IOVA) { + ret = -ENOMEM; + goto release; + } + + return 0; + +release: + dma_afu_ctx_release(dev); + return ret; +} + +static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev) +{ + struct n3000_afu_priv *priv = NULL; + uint8_t *addr = NULL; + uint64_t header = 0; + uint64_t uuid_hi = 0; + uint64_t uuid_lo = 0; + uint64_t next_offset = 0; + int ret = 0; + + if (!dev) + return -EINVAL; + + priv = (struct n3000_afu_priv *)dev->priv; + if (!priv) + return -ENOENT; + + addr = (uint8_t *)dev->addr; + do { + addr += next_offset; + header = rte_read64(addr); + uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET); + uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET); + + if ((DFH_TYPE(header) == DFH_TYPE_AFU) && + (uuid_lo == N3000_NLB0_UUID_L) && + (uuid_hi == N3000_NLB0_UUID_H)) { + AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void *)addr); + ret = nlb_afu_ctx_init(dev, addr); + if (ret) + return ret; + } else if ((DFH_TYPE(header) == DFH_TYPE_BBB) && + (uuid_lo == N3000_DMA_UUID_L) && + (uuid_hi == N3000_DMA_UUID_H) && + (priv->num_dma < NUM_N3000_DMA)) { + AFU_MF_PMD_INFO("AFU DMA%d found @ %p", + priv->num_dma, (void *)addr); + ret = dma_afu_ctx_init(dev, priv->num_dma, addr); + if (ret) + return ret; + priv->num_dma++; + } else { + AFU_MF_PMD_DEBUG("DFH: type %"PRIu64 + ", uuid %016"PRIx64"%016"PRIx64, + DFH_TYPE(header), uuid_hi, uuid_lo); + } + + next_offset = DFH_NEXT_OFFSET(header); + if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0)) + break; + } while (!DFH_EOL(header)); + + return 0; +} + +static int n3000_afu_init(struct afu_mf_rawdev *dev) +{ + if (!dev) + return -EINVAL; + + if (!dev->priv) { + dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0); + if (!dev->priv) + return -ENOMEM; + } + + return n3000_afu_ctx_init(dev); +} + +static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config, + size_t config_size) +{ + struct n3000_afu_priv *priv = NULL; + struct rte_pmd_afu_n3000_cfg *cfg = NULL; + int i = 0; + uint64_t top = 0; + + if (!dev || !config || !config_size) + return -EINVAL; + + priv = (struct n3000_afu_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg)) + return -EINVAL; + + cfg = (struct rte_pmd_afu_n3000_cfg *)config; + if (cfg->type == RTE_PMD_AFU_N3000_NLB) { + if (cfg->nlb_cfg.mode != NLB_MODE_LPBK) + return -EINVAL; + if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) || + (cfg->nlb_cfg.write_vc > NLB_VC_RANDOM)) + return -EINVAL; + if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1) + return -EINVAL; + if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED) + return -EINVAL; + if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I) + return -EINVAL; + if ((cfg->nlb_cfg.multi_cl != 1) && + (cfg->nlb_cfg.multi_cl != 2) && + (cfg->nlb_cfg.multi_cl != 4)) + return -EINVAL; + if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) || + (cfg->nlb_cfg.begin > MAX_CACHE_LINES)) + return -EINVAL; + if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) || + (cfg->nlb_cfg.end > MAX_CACHE_LINES)) + return -EINVAL; + rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg, + sizeof(struct rte_pmd_afu_nlb_cfg)); + } else if (cfg->type == RTE_PMD_AFU_N3000_DMA) { + if (cfg->dma_cfg.index >= NUM_N3000_DMA) + return -EINVAL; + i = cfg->dma_cfg.index; + if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size) + return -EINVAL; + if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size) + return -EINVAL; + top = cfg->dma_cfg.length + cfg->dma_cfg.offset; + if ((top == 0) || (top > priv->dma_ctx[i].mem_size)) + return -EINVAL; + if (i == 3) { /* QDR connected to DMA3 */ + if (cfg->dma_cfg.length & 0x3f) { + cfg->dma_cfg.length &= ~0x3f; + AFU_MF_PMD_INFO("Round size to %x for QDR", + cfg->dma_cfg.length); + } + } + rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg, + sizeof(struct rte_pmd_afu_dma_cfg)); + } else { + AFU_MF_PMD_ERR("Invalid type of N3000 AFU"); + return -EINVAL; + } + + priv->cfg_type = cfg->type; + return 0; +} + +static int n3000_afu_test(struct afu_mf_rawdev *dev) +{ + struct n3000_afu_priv *priv = NULL; + int ret = 0; + + if (!dev) + return -EINVAL; + + if (!dev->priv) + return -ENOENT; + + priv = (struct n3000_afu_priv *)dev->priv; + + if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) { + AFU_MF_PMD_INFO("Test NLB"); + ret = nlb_afu_test(dev); + } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) { + AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index); + ret = dma_afu_test(dev); + } else { + AFU_MF_PMD_ERR("Please configure AFU before test"); + ret = -EINVAL; + } + + return ret; +} + +static int n3000_afu_close(struct afu_mf_rawdev *dev) +{ + if (!dev) + return -EINVAL; + + nlb_afu_ctx_release(dev); + dma_afu_ctx_release(dev); + + rte_free(dev->priv); + dev->priv = NULL; + + return 0; +} + +static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f) +{ + struct n3000_afu_priv *priv = NULL; + + if (!dev) + return -EINVAL; + + priv = (struct n3000_afu_priv *)dev->priv; + if (!priv) + return -ENOENT; + + if (!f) + f = stdout; + + if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) { + struct nlb_afu_ctx *ctx = &priv->nlb_ctx; + fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr); + fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr); + fprintf(f, "dsm_iova:\t%p\n", (void *)ctx->dsm_iova); + fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr); + fprintf(f, "src_iova:\t%p\n", (void *)ctx->src_iova); + fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr); + fprintf(f, "dest_iova:\t%p\n", (void *)ctx->dest_iova); + fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr); + } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) { + struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index]; + fprintf(f, "index:\t\t%d\n", ctx->index); + fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr); + fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr); + fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr); + fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr); + fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr); + fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf); + fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf); + fprintf(f, "magic_iova:\t%p\n", (void *)ctx->magic_iova); + } else { + return -EINVAL; + } + + return 0; +} + +static int n3000_afu_reset(struct afu_mf_rawdev *dev) +{ + uint8_t *addr = NULL; + uint64_t val = 0; + + addr = (uint8_t *)n3000_afu_get_port_addr(dev); + if (!addr) + return -ENOENT; + + val = rte_read64(addr + PORT_CTRL_REG); + val |= PORT_SOFT_RESET; + rte_write64(val, addr + PORT_CTRL_REG); + rte_delay_us(100); + val &= ~PORT_SOFT_RESET; + rte_write64(val, addr + PORT_CTRL_REG); + + return 0; +} + +static struct afu_mf_ops n3000_afu_ops = { + .init = n3000_afu_init, + .config = n3000_afu_config, + .start = NULL, + .stop = NULL, + .test = n3000_afu_test, + .close = n3000_afu_close, + .dump = n3000_afu_dump, + .reset = n3000_afu_reset +}; + +struct afu_mf_drv n3000_afu_drv = { + .uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H }, + .ops = &n3000_afu_ops +}; diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h new file mode 100644 index 0000000..38104ac --- /dev/null +++ b/drivers/raw/afu_mf/n3000_afu.h @@ -0,0 +1,333 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2022 Intel Corporation + */ + +#ifndef _N3000_AFU_H_ +#define _N3000_AFU_H_ + +#include "afu_mf_rawdev.h" +#include "rte_pmd_afu.h" + +#define N3000_AFU_UUID_L 0xc000c9660d824272 +#define N3000_AFU_UUID_H 0x9aeffe5f84570612 +#define N3000_NLB0_UUID_L 0xf89e433683f9040b +#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413 +#define N3000_DMA_UUID_L 0xa9149a35bace01ea +#define N3000_DMA_UUID_H 0xef82def7f6ec40fc + +extern struct afu_mf_drv n3000_afu_drv; + +#define NUM_N3000_DMA 4 +#define MAX_MSIX_VEC 7 + +/* N3000 DFL definition */ +#define DFH_UUID_L_OFFSET 8 +#define DFH_UUID_H_OFFSET 16 +#define DFH_TYPE(hdr) (((hdr) >> 60) & 0xf) +#define DFH_TYPE_AFU 1 +#define DFH_TYPE_BBB 2 +#define DFH_TYPE_PRIVATE 3 +#define DFH_EOL(hdr) (((hdr) >> 40) & 0x1) +#define DFH_NEXT_OFFSET(hdr) (((hdr) >> 16) & 0xffffff) +#define DFH_FEATURE_ID(hdr) ((hdr) & 0xfff) +#define PORT_ATTR_REG(n) (((n) << 3) + 0x38) +#define PORT_IMPLEMENTED(attr) (((attr) >> 60) & 0x1) +#define PORT_BAR(attr) (((attr) >> 32) & 0x7) +#define PORT_OFFSET(attr) ((attr) & 0xffffff) +#define PORT_FEATURE_UINT_ID 0x12 +#define PORT_UINT_CAP_REG 0x8 +#define PORT_VEC_START(cap) (((cap) >> 12) & 0xfff) +#define PORT_VEC_COUNT(cap) ((cap) >> 12 & 0xfff) +#define PORT_CTRL_REG 0x38 +#define PORT_SOFT_RESET (0x1 << 0) + +/* NLB registers definition */ +#define CSR_SCRATCHPAD0 0x100 +#define CSR_SCRATCHPAD1 0x108 +#define CSR_AFU_DSM_BASEL 0x110 +#define CSR_AFU_DSM_BASEH 0x114 +#define CSR_SRC_ADDR 0x120 +#define CSR_DST_ADDR 0x128 +#define CSR_NUM_LINES 0x130 +#define CSR_CTL 0x138 +#define CSR_CFG 0x140 +#define CSR_INACT_THRESH 0x148 +#define CSR_INTERRUPT0 0x150 +#define CSR_SWTEST_MSG 0x158 +#define CSR_STATUS0 0x160 +#define CSR_STATUS1 0x168 +#define CSR_ERROR 0x170 +#define CSR_STRIDE 0x178 +#define CSR_HE_INFO0 0x180 + +#define DSM_SIZE 0x200000 +#define DSM_STATUS 0x40 +#define DSM_POLL_INTERVAL 5 /* ms */ +#define DSM_TIMEOUT 1000 /* ms */ + +#define NLB_BUF_SIZE 0x400000 +#define TEST_MEM_ALIGN 1024 + +struct nlb_csr_ctl { + union { + uint32_t csr; + struct { + uint32_t reset:1; + uint32_t start:1; + uint32_t force_completion:1; + uint32_t reserved:29; + }; + }; +}; + +struct nlb_csr_cfg { + union { + uint32_t csr; + struct { + uint32_t wrthru_en:1; + uint32_t cont:1; + uint32_t mode:3; + uint32_t multicl_len:2; + uint32_t rsvd1:1; + uint32_t delay_en:1; + uint32_t rdsel:2; + uint32_t rsvd2:1; + uint32_t chsel:3; + uint32_t rsvd3:1; + uint32_t wrpush_i:1; + uint32_t wr_chsel:3; + uint32_t rsvd4:3; + uint32_t test_cfg:5; + uint32_t interrupt_on_error:1; + uint32_t interrupt_testmode:1; + uint32_t wrfence_chsel:2; + }; + }; +}; + +struct nlb_status0 { + union { + uint64_t csr; + struct { + uint32_t num_writes; + uint32_t num_reads; + }; + }; +}; + +struct nlb_status1 { + union { + uint64_t csr; + struct { + uint32_t num_pend_writes; + uint32_t num_pend_reads; + }; + }; +}; + +struct nlb_dsm_status { + uint32_t test_complete; + uint32_t test_error; + uint64_t num_clocks; + uint32_t num_reads; + uint32_t num_writes; + uint32_t start_overhead; + uint32_t end_overhead; +}; + +/* DMA registers definition */ +#define DMA_CSR 0x40 +#define DMA_DESC 0x60 +#define DMA_ASE_CTRL 0x200 +#define DMA_ASE_DATA 0x1000 + +#define DMA_ASE_WINDOW 4096 +#define DMA_ASE_WINDOW_MASK ((uint64_t)(DMA_ASE_WINDOW - 1)) +#define INVALID_ASE_PAGE 0xffffffffffffffffULL + +#define DMA_WF_MAGIC 0x5772745F53796E63ULL +#define DMA_WF_MAGIC_ROM 0x1000000000000 +#define DMA_HOST_ADDR(addr) ((addr) | 0x2000000000000) +#define DMA_WF_HOST_ADDR(addr) ((addr) | 0x3000000000000) + +#define NUM_DMA_BUF 8 +#define HALF_DMA_BUF (NUM_DMA_BUF / 2) + +#define DMA_MASK_32_BIT 0xFFFFFFFF + +#define DMA_CSR_BUSY 0x1 +#define DMA_DESC_BUFFER_EMPTY 0x2 +#define DMA_DESC_BUFFER_FULL 0x4 + +#define DWORD_BYTES 4 +#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0) + +#define QWORD_BYTES 8 +#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0) + +#define DMA_ALIGN_BYTES 64 +#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0) + +#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2) + +#define DMA_TIMEOUT_MSEC 5000 + +#define MAGIC_BUF_SIZE 64 +#define ERR_CHECK_LIMIT 64 + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +typedef enum { + HOST_TO_FPGA = 0, + FPGA_TO_HOST, + FPGA_TO_FPGA, + FPGA_MAX_TRANSFER_TYPE, +} fpga_dma_type; + +typedef union { + uint32_t csr; + struct { + uint32_t tx_channel:8; + uint32_t generate_sop:1; + uint32_t generate_eop:1; + uint32_t park_reads:1; + uint32_t park_writes:1; + uint32_t end_on_eop:1; + uint32_t reserved_1:1; + uint32_t transfer_irq_en:1; + uint32_t early_term_irq_en:1; + uint32_t trans_error_irq_en:8; + uint32_t early_done_en:1; + uint32_t reserved_2:6; + uint32_t go:1; + }; +} msgdma_desc_ctrl; + +typedef struct __rte_packed { + uint32_t rd_address; + uint32_t wr_address; + uint32_t len; + uint16_t seq_num; + uint8_t rd_burst_count; + uint8_t wr_burst_count; + uint16_t rd_stride; + uint16_t wr_stride; + uint32_t rd_address_ext; + uint32_t wr_address_ext; + msgdma_desc_ctrl control; +} msgdma_ext_desc; + +typedef union { + uint32_t csr; + struct { + uint32_t busy:1; + uint32_t desc_buf_empty:1; + uint32_t desc_buf_full:1; + uint32_t rsp_buf_empty:1; + uint32_t rsp_buf_full:1; + uint32_t stopped:1; + uint32_t resetting:1; + uint32_t stopped_on_errror:1; + uint32_t stopped_on_early_term:1; + uint32_t irq:1; + uint32_t reserved:22; + }; +} msgdma_status; + +typedef union { + uint32_t csr; + struct { + uint32_t stop_dispatcher:1; + uint32_t reset_dispatcher:1; + uint32_t stop_on_error:1; + uint32_t stopped_on_early_term:1; + uint32_t global_intr_en_mask:1; + uint32_t stop_descriptors:1; + uint32_t reserved:22; + }; +} msgdma_ctrl; + +typedef union { + uint32_t csr; + struct { + uint32_t rd_fill_level:16; + uint32_t wr_fill_level:16; + }; +} msgdma_fill_level; + +typedef union { + uint32_t csr; + struct { + uint32_t rsp_fill_level:16; + uint32_t reserved:16; + }; +} msgdma_rsp_level; + +typedef union { + uint32_t csr; + struct { + uint32_t rd_seq_num:16; + uint32_t wr_seq_num:16; + }; +} msgdma_seq_num; + +typedef struct __rte_packed { + msgdma_status status; + msgdma_ctrl ctrl; + msgdma_fill_level fill_level; + msgdma_rsp_level rsp; + msgdma_seq_num seq_num; +} msgdma_csr; + +#define CSR_STATUS(csr) (&(((msgdma_csr *)(csr))->status)) +#define CSR_CONTROL(csr) (&(((msgdma_csr *)(csr))->ctrl)) + +struct nlb_afu_ctx { + uint8_t *addr; + uint8_t *dsm_ptr; + uint64_t dsm_iova; + uint8_t *src_ptr; + uint64_t src_iova; + uint8_t *dest_ptr; + uint64_t dest_iova; + struct nlb_dsm_status *status_ptr; +}; + +struct dma_afu_ctx { + int index; + uint8_t *addr; + uint8_t *csr_addr; + uint8_t *desc_addr; + uint8_t *ase_ctrl_addr; + uint8_t *ase_data_addr; + uint64_t mem_size; + uint64_t cur_ase_page; + int event_fd; + int verbose; + int pattern; + void *data_buf; + void *ref_buf; + msgdma_ext_desc *desc_buf; + uint64_t *magic_buf; + uint64_t magic_iova; + uint32_t dma_buf_size; + uint64_t *dma_buf[NUM_DMA_BUF]; + uint64_t dma_iova[NUM_DMA_BUF]; +}; + +struct n3000_afu_priv { + struct rte_pmd_afu_nlb_cfg nlb_cfg; + struct rte_pmd_afu_dma_cfg dma_cfg; + struct nlb_afu_ctx nlb_ctx; + struct dma_afu_ctx dma_ctx[NUM_N3000_DMA]; + int num_dma; + int cfg_type; +}; + +#endif /* _N3000_AFU_H_ */ diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h new file mode 100644 index 0000000..89d866a --- /dev/null +++ b/drivers/raw/afu_mf/rte_pmd_afu.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2022 Intel Corporation + */ + +#ifndef __RTE_PMD_AFU_H__ +#define __RTE_PMD_AFU_H__ + +/** + * @file rte_pmd_afu.h + * + * AFU PMD specific definitions. + * + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#define RTE_PMD_AFU_N3000_NLB 1 +#define RTE_PMD_AFU_N3000_DMA 2 + +#define NLB_MODE_LPBK 0 +#define NLB_MODE_READ 1 +#define NLB_MODE_WRITE 2 +#define NLB_MODE_TRPUT 3 + +#define NLB_VC_AUTO 0 +#define NLB_VC_VL0 1 +#define NLB_VC_VH0 2 +#define NLB_VC_VH1 3 +#define NLB_VC_RANDOM 4 + +#define NLB_WRLINE_M 0 +#define NLB_WRLINE_I 1 +#define NLB_WRPUSH_I 2 + +#define NLB_RDLINE_S 0 +#define NLB_RDLINE_I 1 +#define NLB_RDLINE_MIXED 2 + +#define MIN_CACHE_LINES 1 +#define MAX_CACHE_LINES 1024 + +#define MIN_DMA_BUF_SIZE 64 +#define MAX_DMA_BUF_SIZE (1023 * 1024) + +/** + * NLB AFU configuration data structure. + */ +struct rte_pmd_afu_nlb_cfg { + uint32_t mode; + uint32_t begin; + uint32_t end; + uint32_t multi_cl; + uint32_t cont; + uint32_t timeout; + uint32_t cache_policy; + uint32_t cache_hint; + uint32_t read_vc; + uint32_t write_vc; + uint32_t wrfence_vc; + uint32_t freq_mhz; +}; + +/** + * DMA AFU configuration data structure. + */ +struct rte_pmd_afu_dma_cfg { + uint32_t index; /* index of DMA controller */ + uint32_t length; /* total length of data to DMA */ + uint32_t offset; /* address offset of target memory */ + uint32_t size; /* size of transfer buffer */ + uint32_t pattern; /* data pattern to fill in test buffer */ + uint32_t unaligned; /* use unaligned address or length in sweep test */ + uint32_t verbose; /* enable verbose error information in test */ +}; + +/** + * N3000 AFU configuration data structure. + */ +struct rte_pmd_afu_n3000_cfg { + int type; /* RTE_PMD_AFU_N3000_NLB or RTE_PMD_AFU_N3000_DMA */ + union { + struct rte_pmd_afu_nlb_cfg nlb_cfg; + struct rte_pmd_afu_dma_cfg dma_cfg; + }; +}; + +/** + * HE-LBK & HE-MEM-LBK AFU configuration data structure. + */ +struct rte_pmd_afu_he_lbk_cfg { + uint32_t mode; + uint32_t begin; + uint32_t end; + uint32_t multi_cl; + uint32_t cont; + uint32_t timeout; + uint32_t trput_interleave; + uint32_t freq_mhz; +}; + +/** + * HE-MEM-TG AFU configuration data structure. + */ +struct rte_pmd_afu_he_mem_tg_cfg { + uint32_t channel_mask; /* mask of traffic generator channel */ +}; + +/** + * HE-HSSI AFU configuration data structure. + */ +struct rte_pmd_afu_he_hssi_cfg { + uint32_t port; + uint32_t timeout; + uint32_t num_packets; + uint32_t random_length; + uint32_t packet_length; + uint32_t random_payload; + uint32_t rnd_seed[3]; + uint64_t src_addr; + uint64_t dest_addr; + int he_loopback; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* __RTE_PMD_AFU_H__ */ diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map new file mode 100644 index 0000000..c2e0723 --- /dev/null +++ b/drivers/raw/afu_mf/version.map @@ -0,0 +1,3 @@ +DPDK_22 { + local: *; +}; diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build index 05e7de1..c3627f7 100644 --- a/drivers/raw/meson.build +++ b/drivers/raw/meson.build @@ -6,6 +6,7 @@ if is_windows endif drivers = [ + 'afu_mf', 'cnxk_bphy', 'cnxk_gpio', 'dpaa2_cmdif', -- 1.8.3.1