DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v1] raw/afu_mf: introduce AFU MF device driver
@ 2022-05-17  6:29 Wei Huang
  2022-05-17  7:34 ` [PATCH v2] " Wei Huang
  0 siblings, 1 reply; 57+ messages in thread
From: Wei Huang @ 2022-05-17  6:29 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

Add afu_mf driver to manage various AFU (Acceleration Function Unit)
in FPGA.

Signed-off-by: Wei Huang <wei.huang@intel.com>
Acked-by: Tianfei Zhang <tianfei.zhang@intel.com>
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
 drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
 drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
 drivers/raw/afu_mf/he_hssi.h       |  102 ++
 drivers/raw/afu_mf/he_lbk.c        |  429 ++++++++
 drivers/raw/afu_mf/he_lbk.h        |  121 +++
 drivers/raw/afu_mf/he_mem.c        |  181 ++++
 drivers/raw/afu_mf/he_mem.h        |   40 +
 drivers/raw/afu_mf/meson.build     |    8 +
 drivers/raw/afu_mf/n3000_afu.c     | 1997 ++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
 drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
 drivers/raw/afu_mf/version.map     |    3 +
 drivers/raw/meson.build            |    1 +
 14 files changed, 4247 insertions(+)
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
 create mode 100644 drivers/raw/afu_mf/he_hssi.c
 create mode 100644 drivers/raw/afu_mf/he_hssi.h
 create mode 100644 drivers/raw/afu_mf/he_lbk.c
 create mode 100644 drivers/raw/afu_mf/he_lbk.h
 create mode 100644 drivers/raw/afu_mf/he_mem.c
 create mode 100644 drivers/raw/afu_mf/he_mem.h
 create mode 100644 drivers/raw/afu_mf/meson.build
 create mode 100644 drivers/raw/afu_mf/n3000_afu.c
 create mode 100644 drivers/raw/afu_mf/n3000_afu.h
 create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
 create mode 100644 drivers/raw/afu_mf/version.map

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
new file mode 100644
index 0000000..f24c748
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -0,0 +1,440 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_rawdev_pmd.h>
+
+#include "rte_pmd_afu.h"
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+#include "he_lbk.h"
+#include "he_mem.h"
+#include "he_hssi.h"
+
+#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
+
+static const struct rte_afu_uuid afu_uuid_map[] = {
+	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
+	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	{ HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	{ 0, 0 /* sentinel */ }
+};
+
+static struct afu_mf_drv *afu_table[] = {
+	&n3000_afu_drv,
+	&he_lbk_drv,
+	&he_mem_lbk_drv,
+	&he_mem_tg_drv,
+	&he_hssi_drv,
+	NULL
+};
+
+static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
+{
+	int32_t x = 0;
+
+	if (!dev || !dev->shared)
+		return -ENODEV;
+
+	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
+
+	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1,
+				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
+{
+	if (!dev || !dev->shared)
+		return;
+
+	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
+}
+
+static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
+	rte_rawdev_obj_t config, size_t config_size)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->config)
+		ret = (*dev->ops->config)(dev, config, config_size);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_start(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please start it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->start)
+		ret = (*dev->ops->start)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
+		return;
+	}
+
+	if (dev->ops && dev->ops->stop)
+		ret = (*dev->ops->stop)(dev);
+
+	afu_mf_unlock(dev);
+}
+
+static int afu_mf_rawdev_close(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->close)
+		ret = (*dev->ops->close)(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->reset)
+		ret = (*dev->ops->reset)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_selftest(uint16_t dev_id)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
+		return -ENODEV;
+
+	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
+	if (!dev)
+		return -ENOENT;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please test it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->test)
+		ret = (*dev->ops->test)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->dump)
+		ret = (*dev->ops->dump)(dev, f);
+
+	return ret;
+}
+
+static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
+	.dev_info_get = NULL,
+	.dev_configure = afu_mf_rawdev_configure,
+	.dev_start = afu_mf_rawdev_start,
+	.dev_stop = afu_mf_rawdev_stop,
+	.dev_close = afu_mf_rawdev_close,
+	.dev_reset = afu_mf_rawdev_reset,
+
+	.queue_def_conf = NULL,
+	.queue_setup = NULL,
+	.queue_release = NULL,
+	.queue_count = NULL,
+
+	.attr_get = NULL,
+	.attr_set = NULL,
+
+	.enqueue_bufs = NULL,
+	.dequeue_bufs = NULL,
+
+	.dump = afu_mf_rawdev_dump,
+
+	.xstats_get = NULL,
+	.xstats_get_names = NULL,
+	.xstats_get_by_name = NULL,
+	.xstats_reset = NULL,
+
+	.firmware_status_get = NULL,
+	.firmware_version_get = NULL,
+	.firmware_load = NULL,
+	.firmware_unload = NULL,
+
+	.dev_selftest = afu_mf_rawdev_selftest,
+};
+
+static int
+afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
+	int socket_id)
+{
+	const struct rte_memzone *mz;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	struct afu_mf_shared *ptr = NULL;
+	int init_mz = 0;
+
+	if (!name || !data)
+		return -EINVAL;
+
+	/* name format is afu_?|??:??.? which is unique */
+	snprintf(mz_name, sizeof(mz_name), "%s", name);
+
+	mz = rte_memzone_lookup(mz_name);
+	if (!mz) {
+		mz = rte_memzone_reserve(mz_name,
+				sizeof(struct afu_mf_shared),
+				socket_id, 0);
+		init_mz = 1;
+	}
+
+	if (!mz) {
+		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
+			mz_name);
+		return -ENOMEM;
+	}
+
+	ptr = (struct afu_mf_shared *)mz->addr;
+
+	if (init_mz)  /* initialize memory zone on the first time */
+		ptr->lock = 0;
+
+	*data = ptr;
+
+	return 0;
+}
+
+static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char *name,
+	size_t size)
+{
+	int n = 0;
+
+	if (!afu_dev || !name || !size)
+		return -EINVAL;
+
+	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
+	if (n >= (int)size) {
+		AFU_MF_PMD_ERR("Name of AFU device is too long!");
+		return -ENAMETOOLONG;
+	}
+
+	return 0;
+}
+
+static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
+{
+	struct afu_mf_drv *entry = NULL;
+	int i = 0;
+
+	if (!afu_id)
+		return NULL;
+
+	while ((entry = afu_table[i++])) {
+		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
+			(entry->uuid.uuid_high == afu_id->uuid_high))
+			break;
+	}
+
+	return entry ? entry->ops : NULL;
+}
+
+static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int socket_id)
+{
+	struct rte_rawdev *rawdev = NULL;
+	struct afu_mf_rawdev *dev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
+		name, socket_id);
+
+	/* Allocate device structure */
+	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct afu_mf_rawdev),
+				socket_id);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Unable to allocate raw device");
+		return -ENOMEM;
+	}
+
+	rawdev->dev_ops = &afu_mf_rawdev_ops;
+	rawdev->device = &afu_dev->device;
+	rawdev->driver_name = afu_dev->driver->driver.name;
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		goto cleanup;
+
+	dev->rawdev = rawdev;
+	dev->port = afu_dev->id.port;
+	dev->addr = afu_dev->mem_resource[0].addr;
+	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
+	if (dev->ops == NULL) {
+		AFU_MF_PMD_ERR("Unsupported AFU device");
+		goto cleanup;
+	}
+
+	if (dev->ops->init) {
+		ret = (*dev->ops->init)(dev);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed to init %s", name);
+			goto cleanup;
+		}
+	}
+
+	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
+	if (ret)
+		goto cleanup;
+
+	return ret;
+
+cleanup:
+	rte_rawdev_pmd_release(rawdev);
+	return ret;
+}
+
+static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev)
+{
+	struct rte_rawdev *rawdev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Destroy raw device %s", name);
+
+	rawdev = rte_rawdev_pmd_get_named_dev(name);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Raw device %s not found", name);
+		return -EINVAL;
+	}
+
+	/* rte_rawdev_close is called by pmd_release */
+	ret = rte_rawdev_pmd_release(rawdev);
+	if (ret)
+		AFU_MF_PMD_DEBUG("Device cleanup failed");
+
+	return 0;
+}
+
+static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_create(afu_dev, rte_socket_id());
+}
+
+static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_destroy(afu_dev);
+}
+
+static struct rte_afu_driver afu_mf_pmd_drv = {
+	.id_table = afu_uuid_map,
+	.probe = afu_mf_rawdev_probe,
+	.remove = afu_mf_rawdev_remove
+};
+
+RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv);
+RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h b/drivers/raw/afu_mf/afu_mf_rawdev.h
new file mode 100644
index 0000000..5690010
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __AFU_MF_RAWDEV_H__
+#define __AFU_MF_RAWDEV_H__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+extern int afu_mf_pmd_logtype;
+
+#define AFU_MF_PMD_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
+		__func__, ##args)
+
+#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
+
+#define AFU_MF_PMD_DEBUG(fmt, args...) \
+	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
+#define AFU_MF_PMD_INFO(fmt, args...) \
+	AFU_MF_PMD_LOG(INFO, fmt, ## args)
+#define AFU_MF_PMD_ERR(fmt, args...) \
+	AFU_MF_PMD_LOG(ERR, fmt, ## args)
+#define AFU_MF_PMD_WARN(fmt, args...) \
+	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
+
+#define CACHE_LINE_SIZE(n)  ((n) << 6)
+#define CACHE_LINE_ALIGNED(n)  ((n) >> 6)
+#define MHZ(f)  ((f) * 1000000)
+
+#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
+({                                                       \
+	uint64_t __wait = 0;                                 \
+	uint64_t __invl = (invl);                            \
+	uint64_t __timeout = (timeout);                      \
+	for (; __wait <= __timeout; __wait += __invl) {      \
+		(val) = *(addr);                                 \
+		if (cond)                                        \
+			break;                                       \
+		rte_delay_ms(__invl);                            \
+	}                                                    \
+	(cond) ? 0 : 1;                                      \
+})
+
+struct afu_mf_rawdev;
+
+struct afu_mf_ops {
+	int (*init)(struct afu_mf_rawdev *dev);
+	int (*config)(struct afu_mf_rawdev *dev, void *config,
+		size_t config_size);
+	int (*start)(struct afu_mf_rawdev *dev);
+	int (*stop)(struct afu_mf_rawdev *dev);
+	int (*test)(struct afu_mf_rawdev *dev);
+	int (*close)(struct afu_mf_rawdev *dev);
+	int (*reset)(struct afu_mf_rawdev *dev);
+	int (*dump)(struct afu_mf_rawdev *dev, FILE *f);
+};
+
+struct afu_mf_drv {
+	struct rte_afu_uuid uuid;
+	struct afu_mf_ops *ops;
+};
+
+struct afu_mf_shared {
+	int32_t lock;
+};
+
+struct afu_mf_rawdev {
+	struct rte_rawdev *rawdev;  /* point to parent raw device */
+	struct afu_mf_shared *shared;  /* shared data for multi-process */
+	struct afu_mf_ops *ops;  /* device operation functions */
+	int port;  /* index of port the AFU attached */
+	void *addr;  /* base address of AFU registers */
+	void *priv;  /* private driver data */
+};
+
+static inline struct afu_mf_rawdev *
+afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev)
+{
+	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL;
+}
+
+#endif /* __AFU_MF_RAWDEV_H__ */
diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c
new file mode 100644
index 0000000..68d8dba
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_hssi.h"
+
+static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr, value);
+
+	if (!ctx)
+		return -EINVAL;
+
+	data.write_data = value;
+	rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
+
+	cmd.csr = 0;
+	cmd.write_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	return 0;
+}
+
+static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t *value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	cmd.csr = 0;
+	cmd.read_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans) {
+			data.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_DATA);
+			*value = data.read_data;
+			break;
+		}
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr, *value);
+	return 0;
+}
+
+static void he_hssi_report(struct he_hssi_ctx *ctx)
+{
+	uint32_t val = 0;
+	uint64_t v64 = 0;
+	int ret = 0;
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
+	if (ret)
+		return;
+	printf("Number of good packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
+	if (ret)
+		return;
+	printf("Number of bad packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
+	if (ret)
+		return;
+	v64 = val;
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
+	if (ret)
+		return;
+	v64 = (v64 << 32) | val;
+	printf("Number of bytes received: %"PRIu64"\n", v64);
+
+	ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
+	if (ret)
+		return;
+	if (val & ERR_VALID) {
+		printf("AVST rx error:");
+		if (val & OVERFLOW_ERR)
+			printf(" overflow");
+		if (val & LENGTH_ERR)
+			printf(" length");
+		if (val & OVERSIZE_ERR)
+			printf(" oversize");
+		if (val & UNDERSIZE_ERR)
+			printf(" undersize");
+		if (val & MAC_CRC_ERR)
+			printf(" crc");
+		if (val & PHY_ERR)
+			printf(" phy");
+		printf("\n");
+	}
+
+	ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
+	if (ret)
+		return;
+	if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
+		printf("FIFO status:");
+		if (val & ALMOST_EMPTY)
+			printf(" almost empty");
+		if (val & ALMOST_FULL)
+			printf(" almost full");
+		printf("\n");
+	}
+}
+
+static int he_hssi_test(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+	struct traffic_ctrl_ch_sel sel;
+	uint32_t val = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_hssi_cfg;
+	ctx = &priv->he_hssi_ctx;
+
+	ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
+	if (ret)
+		return ret;
+
+	sel.channel_sel = cfg->port;
+	rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
+
+	if (cfg->he_loopback >= 0) {
+		val = cfg->he_loopback ? 1 : 0;
+		AFU_MF_PMD_INFO("%s HE loopback on port %u",
+			val ? "Enable" : "Disable", cfg->port);
+		return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
+	if (ret)
+		return ret;
+
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
+	if (ret)
+		return ret;
+
+	val = cfg->src_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->src_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->dest_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->dest_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_length ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_payload ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 3; i++) {
+		ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
+			cfg->rnd_seed[i]);
+		if (ret)
+			return ret;
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
+	if (ret)
+		return ret;
+
+	while (i++ < cfg->timeout) {
+		ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
+		if (ret)
+			break;
+		if (val == cfg->num_packets)
+			break;
+		sleep(1);
+	}
+
+	he_hssi_report(ctx);
+
+	return ret;
+}
+
+static int he_hssi_init(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_hssi_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
+	if (cfg->port >= NUM_HE_HSSI_PORTS)
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
+
+	return 0;
+}
+
+static int he_hssi_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_hssi_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_hssi_ops = {
+	.init = he_hssi_init,
+	.config = he_hssi_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_hssi_test,
+	.close = he_hssi_close,
+	.dump = he_hssi_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_hssi_drv = {
+	.uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	.ops = &he_hssi_ops
+};
diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h
new file mode 100644
index 0000000..f8b9623
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_HSSI_H_
+#define _HE_HSSI_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_HSSI_UUID_L    0xbb370242ac130002
+#define HE_HSSI_UUID_H    0x823c334c98bf11ea
+#define NUM_HE_HSSI_PORTS 8
+
+extern struct afu_mf_drv he_hssi_drv;
+
+/* HE-HSSI registers definition */
+#define TRAFFIC_CTRL_CMD    0x30
+#define TRAFFIC_CTRL_DATA   0x38
+#define TRAFFIC_CTRL_CH_SEL 0x40
+#define AFU_SCRATCHPAD      0x48
+
+#define TG_NUM_PKT        0x3c00
+#define TG_PKT_LEN_TYPE   0x3c01
+#define TG_DATA_PATTERN   0x3c02
+#define TG_START_XFR      0x3c03
+#define TG_STOP_XFR       0x3c04
+#define TG_SRC_MAC_L      0x3c05
+#define TG_SRC_MAC_H      0x3c06
+#define TG_DST_MAC_L      0x3c07
+#define TG_DST_MAC_H      0x3c08
+#define TG_PKT_XFRD       0x3c09
+#define TG_RANDOM_SEED(n) (0x3c0a + (n))
+#define TG_PKT_LEN        0x3c0d
+
+#define TM_NUM_PKT        0x3d00
+#define TM_PKT_GOOD       0x3d01
+#define TM_PKT_BAD        0x3d02
+#define TM_BYTE_CNT0      0x3d03
+#define TM_BYTE_CNT1      0x3d04
+#define TM_AVST_RX_ERR    0x3d07
+#define   OVERFLOW_ERR    (1 << 9)
+#define   LENGTH_ERR      (1 << 8)
+#define   OVERSIZE_ERR    (1 << 7)
+#define   UNDERSIZE_ERR   (1 << 6)
+#define   MAC_CRC_ERR     (1 << 5)
+#define   PHY_ERR         (1 << 4)
+#define   ERR_VALID       (1 << 3)
+
+#define LOOPBACK_EN          0x3e00
+#define LOOPBACK_FIFO_STATUS 0x3e01
+#define   ALMOST_EMPTY    (1 << 1)
+#define   ALMOST_FULL     (1 << 0)
+
+#define MAILBOX_TIMEOUT_MS       100
+#define MAILBOX_POLL_INTERVAL_MS 10
+
+struct traffic_ctrl_cmd {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_cmd:1;
+			uint32_t write_cmd:1;
+			uint32_t ack_trans:1;
+			uint32_t rsvd1:29;
+			uint32_t afu_cmd_addr:16;
+			uint32_t rsvd2:16;
+		};
+	};
+};
+
+struct traffic_ctrl_data {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_data;
+			uint32_t write_data;
+		};
+	};
+};
+
+struct traffic_ctrl_ch_sel {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t channel_sel:3;
+			uint32_t rsvd1:29;
+			uint32_t rsvd2;
+		};
+	};
+};
+
+struct he_hssi_ctx {
+	uint8_t *addr;
+};
+
+struct he_hssi_priv {
+	struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
+	struct he_hssi_ctx he_hssi_ctx;
+};
+
+#endif /* _HE_HSSI_H_ */
diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c
new file mode 100644
index 0000000..d47ddde
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_lbk.h"
+
+static int he_lbk_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	v.mode = cfg->mode;
+	v.trput_interleave = cfg->trput_interleave;
+	if (cfg->multi_cl == 4)
+		v.multicl_len = 2;
+	else
+		v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_dsm_status *stat = NULL;
+	struct he_lbk_status0 stat0;
+	struct he_lbk_status1 stat1;
+	uint64_t swtest_msg = 0;
+	uint64_t ticks = 0;
+	uint64_t info = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	stat = ctx->status_ptr;
+
+	swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
+	stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
+	stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0) {
+		info = rte_read64(ctx->addr + CSR_HE_INFO0);
+		AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
+		cfg->freq_mhz = info & 0xffff;
+		if (cfg->freq_mhz == 0) {
+			AFU_MF_PMD_INFO("Frequency of AFU clock is unknown."
+				" Assuming 350 MHz.");
+			cfg->freq_mhz = 350;
+		}
+	}
+
+	num = (double)stat0.num_reads;
+	rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat0.num_writes;
+	wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Pend_Read Pend_Write "
+		"Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
+		cfg->freq_mhz);
+	printf("%10u  %10u %10u %10u %10u  %12lu   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat0.num_reads, stat0.num_writes,
+		stat1.num_pend_reads, stat1.num_pend_writes,
+		ticks, rd_bw / 1e9, wr_bw / 1e9);
+	printf("Test Message: 0x%"PRIx64"\n", swtest_msg);
+}
+
+static int he_lbk_test(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	rte_delay_us(1000);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	/* initialize DMA addresses */
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova),
+		ctx->addr + CSR_SRC_ADDR);
+
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova),
+		ctx->addr + CSR_DST_ADDR);
+
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova),
+		ctx->addr + CSR_AFU_DSM_BASEL);
+	rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova) >> 32,
+		ctx->addr + CSR_AFU_DSM_BASEH);
+
+	ret = he_lbk_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CACHE_LINE_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		rte_delay_us(1000);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		he_lbk_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		if (cfg->mode == NLB_MODE_LPBK) {
+			ptr = (uint32_t *)ctx->dest_ptr;
+			j = CACHE_LINE_SIZE(cl) >> 2;
+			for (i = 0; i < j; i++) {
+				if (*ptr++ != i) {
+					AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+					break;
+				}
+			}
+		}
+	}
+
+end:
+	return 0;
+}
+
+static int he_lbk_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int he_lbk_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
+	return 0;
+
+release:
+	he_lbk_ctx_release(dev);
+	return ret;
+}
+
+static int he_lbk_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return he_lbk_ctx_init(dev);
+}
+
+static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
+	if (cfg->mode > NLB_MODE_TRPUT)
+		return -EINVAL;
+	if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
+		(cfg->multi_cl != 4))
+		return -EINVAL;
+	if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin > MAX_CACHE_LINES))
+		return -EINVAL;
+	if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
+
+	return 0;
+}
+
+static int he_lbk_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	he_lbk_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_lbk_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+	fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+	fprintf(f, "dsm_iova:\t%p\n", (void *)ctx->dsm_iova);
+	fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+	fprintf(f, "src_iova:\t%p\n", (void *)ctx->src_iova);
+	fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+	fprintf(f, "dest_iova:\t%p\n", (void *)ctx->dest_iova);
+	fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_lbk_ops = {
+	.init = he_lbk_init,
+	.config = he_lbk_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_lbk_test,
+	.close = he_lbk_close,
+	.dump = he_lbk_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_lbk_drv = {
+	.uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
+
+struct afu_mf_drv he_mem_lbk_drv = {
+	.uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h
new file mode 100644
index 0000000..c2e8a29
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_LBK_H_
+#define _HE_LBK_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_LBK_UUID_L      0xb94b12284c31e02b
+#define HE_LBK_UUID_H      0x56e203e9864f49a7
+#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb
+#define HE_MEM_LBK_UUID_H  0x8568ab4e6ba54616
+
+extern struct afu_mf_drv he_lbk_drv;
+extern struct afu_mf_drv he_mem_lbk_drv;
+
+/* HE-LBK & HE-MEM-LBK registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct he_lbk_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct he_lbk_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t rsvd1:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd2:13;
+			uint32_t trput_interleave:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t rsvd3:2;
+		};
+	};
+};
+
+struct he_lbk_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct he_lbk_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct he_lbk_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+struct he_lbk_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct he_lbk_dsm_status *status_ptr;
+};
+
+struct he_lbk_priv {
+	struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
+	struct he_lbk_ctx he_lbk_ctx;
+};
+
+#endif /* _HE_LBK_H_ */
diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
new file mode 100644
index 0000000..ccbb3a8
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_mem.h"
+
+static int he_mem_tg_test(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+	uint64_t value = 0x12345678;
+	uint64_t cap = 0;
+	uint64_t channel_mask = 0;
+	int i, t = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_mem_tg_cfg;
+	ctx = &priv->he_mem_tg_ctx;
+
+	AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
+
+	rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
+	cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
+	AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
+	if (cap != value) {
+		AFU_MF_PMD_ERR("Test scratchpad register failed");
+		return -EIO;
+	}
+
+	cap = rte_read64(ctx->addr + MEM_TG_CTRL);
+	AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
+
+	channel_mask = cfg->channel_mask & cap;
+	/* start traffic generators */
+	rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
+
+	/* check test status */
+	while (t < MEM_TG_TIMEOUT_MS) {
+		value = rte_read64(ctx->addr + MEM_TG_STAT);
+		for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
+			if (channel_mask & (1 << i)) {
+				if (TGACTIVE(value, i))
+					continue;
+				printf("TG channel %d test %s\n", i,
+					TGPASS(value, i) ? "pass" :
+					TGTIMEOUT(value, i) ? "timeout" :
+					TGFAIL(value, i) ? "fail" : "error");
+				channel_mask &= ~(1 << i);
+			}
+		}
+		if (!channel_mask)
+			break;
+		rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
+		t += MEM_TG_POLL_INTERVAL_MS;
+	}
+
+	if (channel_mask) {
+		AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value);
+		return channel_mask;
+	}
+
+	return 0;
+}
+
+static int he_mem_tg_init(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_mem_tg_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_mem_tg_priv *priv = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv->he_mem_tg_cfg));
+
+	return 0;
+}
+
+static int he_mem_tg_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_mem_tg_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_mem_tg_ops = {
+	.init = he_mem_tg_init,
+	.config = he_mem_tg_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_mem_tg_test,
+	.close = he_mem_tg_close,
+	.dump = he_mem_tg_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_mem_tg_drv = {
+	.uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	.ops = &he_mem_tg_ops
+};
diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h
new file mode 100644
index 0000000..82404b6
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_MEM_H_
+#define _HE_MEM_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
+#define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
+
+#define NUM_MEM_TG_CHANNELS      4
+#define MEM_TG_TIMEOUT_MS     5000
+#define MEM_TG_POLL_INTERVAL_MS 10
+
+extern struct afu_mf_drv he_mem_tg_drv;
+
+/* MEM-TG registers definition */
+#define MEM_TG_SCRATCHPAD   0x28
+#define MEM_TG_CTRL         0x30
+#define   TGCONTROL(n)      (1 << (n))
+#define MEM_TG_STAT         0x38
+#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
+#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
+#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
+#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
+#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
+
+struct he_mem_tg_ctx {
+	uint8_t *addr;
+};
+
+struct he_mem_tg_priv {
+	struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
+	struct he_mem_tg_ctx he_mem_tg_ctx;
+};
+
+#endif /* _HE_MEM_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
new file mode 100644
index 0000000..f304bc8
--- /dev/null
+++ b/drivers/raw/afu_mf/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022 Intel Corporation
+
+deps += ['rawdev', 'bus_pci', 'bus_ifpga']
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
+	'he_hssi.c')
+
+headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c
new file mode 100644
index 0000000..420e84a
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.c
@@ -0,0 +1,1997 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+
+static int nlb_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->nlb_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	if (cfg->cache_policy == NLB_WRPUSH_I)
+		v.wrpush_i = 1;
+	else
+		v.wrthru_en = cfg->cache_policy;
+
+	if (cfg->cache_hint == NLB_RDLINE_MIXED)
+		v.rdsel = 3;
+	else
+		v.rdsel = cfg->cache_hint;
+
+	v.mode = cfg->mode;
+	v.chsel = cfg->read_vc;
+	v.wr_chsel = cfg->write_vc;
+	v.wrfence_chsel = cfg->wrfence_vc;
+	v.wrthru_en = cfg->cache_policy;
+	v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_dsm_status *stat = NULL;
+	uint64_t ticks = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	cfg = &priv->nlb_cfg;
+	stat = priv->nlb_ctx.status_ptr;
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0)
+		cfg->freq_mhz = 200;
+
+	num = (double)stat->num_reads;
+	rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat->num_writes;
+	wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
+		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
+	printf("%10u  %10u %11u  %12lu   %7.3f GB/s   %7.3f GB/s\n", cl,
+		stat->num_reads, stat->num_writes, ticks,
+		rd_bw / 1e9, wr_bw / 1e9);
+}
+
+static int nlb_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	ctx = &priv->nlb_ctx;
+	cfg = &priv->nlb_cfg;
+
+	/* initialize registers */
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova),
+		ctx->addr + CSR_SRC_ADDR);
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova),
+		ctx->addr + CSR_DST_ADDR);
+
+	ret = nlb_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CACHE_LINE_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
+
+		rte_delay_us(10);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		nlb_afu_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		ptr = (uint32_t *)ctx->dest_ptr;
+		j = CACHE_LINE_SIZE(cl) >> 2;
+		for (i = 0; i < j; i++) {
+			if (*ptr++ != i) {
+				AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+				break;
+			}
+		}
+	}
+
+end:
+	return ret;
+}
+
+static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
+{
+	int i = 0;
+
+	if (!ctx)
+		return;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		rte_free(ctx->dma_buf[i]);
+		ctx->dma_buf[i] = NULL;
+	}
+
+	rte_free(ctx->data_buf);
+	ctx->data_buf = NULL;
+
+	rte_free(ctx->ref_buf);
+	ctx->ref_buf = NULL;
+}
+
+static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
+	struct rte_pmd_afu_dma_cfg *cfg)
+{
+	size_t page_sz = sysconf(_SC_PAGE_SIZE);
+	int i, ret = 0;
+
+	if (!ctx || !cfg)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
+			TEST_MEM_ALIGN);
+		if (!ctx->dma_buf[i]) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
+		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
+	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->data_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->ref_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
+{
+	int *ptr = NULL;
+	size_t i = 0;
+	size_t dword_size = 0;
+
+	if (!ctx || !size)
+		return;
+
+	ptr = (int *)ctx->ref_buf;
+
+	if (ctx->pattern) {
+		memset(ptr, ctx->pattern, size);
+	} else {
+		srand(99);
+		dword_size = size >> 2;
+		for (i = 0; i < dword_size; i++)
+			*ptr++ = rand();
+	}
+	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
+}
+
+static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
+{
+	uint8_t *src = NULL;
+	uint8_t *dst = NULL;
+	size_t i = 0;
+	int n = 0;
+
+	if (!ctx || !size)
+		return -EINVAL;
+
+	src = (uint8_t *)ctx->ref_buf;
+	dst = (uint8_t *)ctx->data_buf;
+
+	if (memcmp(src, dst, size)) {
+		printf("Transfer is corrupted\n");
+		if (ctx->verbose) {
+			for (i = 0; i < size; i++) {
+				if (*src != *dst) {
+					if (++n >= ERR_CHECK_LIMIT)
+						break;
+					printf("Mismatch at 0x%zx, "
+						"Expected %02x  Actual %02x\n",
+						i, *src, *dst);
+				}
+				src++;
+				dst++;
+			}
+			if (n < ERR_CHECK_LIMIT) {
+				printf("Found %d error bytes\n", n);
+			} else {
+				printf("......\n");
+				printf("Found more than %d error bytes\n", n);
+			}
+		}
+		return -1;
+	}
+
+	printf("Transfer is verified\n");
+	return 0;
+}
+
+static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		rte_write64(*host_addr, dev_addr);
+}
+
+static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		*host_addr = rte_read64(dev_addr);
+}
+
+static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
+{
+	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
+
+	if (!ctx)
+		return;
+
+	if (requested_page != ctx->cur_ase_page) {
+		rte_write64(requested_page, ctx->ase_ctrl_addr);
+		ctx->cur_ase_page = requested_page;
+	}
+}
+
+static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy(((char *)(&val)) + shift, (void *)host_addr, count);
+
+	/* write back to device */
+	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
+
+	return 0;
+}
+
+static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(dst))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(dst)) {
+		/* Write out a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+
+		rte_write32(*(uint32_t *)src, ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Write out blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)src, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Write out remaining DWORD */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		rte_write32(*(uint32_t *)src, ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t count)
+{
+	uint64_t dst = *dst_ptr;
+	uint64_t src = *src_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* aligns address to 8 byte using dst masking method */
+	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
+		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		src += unaligned_size;
+		dst += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_write(ctx, &dst, &src, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using dst masking method */
+	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy((void *)host_addr, ((char *)(&val)) + shift, count);
+
+	return 0;
+}
+
+static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(src))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(src)) {
+		/* Read a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)dst = rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Read blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= src & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)dst, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Read remaining DWORD */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)dst = rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* Aligns address to 8 byte using src masking method */
+	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
+		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		dst += unaligned_size;
+		src += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_read(ctx, &src, &dst, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using src masking method */
+	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static void clear_interrupt(struct dma_afu_ctx *ctx)
+{
+	/* clear interrupt by writing 1 to IRQ bit in status register */
+	msgdma_status status;
+
+	if (!ctx)
+		return;
+
+	status.csr = 0;
+	status.irq = 1;
+	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
+}
+
+static int poll_interrupt(struct dma_afu_ctx *ctx)
+{
+	struct pollfd pfd = {0};
+	uint64_t count = 0;
+	ssize_t bytes_read = 0;
+	int poll_ret = 0;
+	int ret = 0;
+
+	if (!ctx || (ctx->event_fd < 0))
+		return -EINVAL;
+
+	pfd.fd = ctx->event_fd;
+	pfd.events = POLLIN;
+	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
+	if (poll_ret < 0) {
+		AFU_MF_PMD_ERR("Error %s", strerror(errno));
+		ret = -EFAULT;
+		goto out;
+	} else if (poll_ret == 0) {
+		AFU_MF_PMD_ERR("Timeout");
+		ret = -ETIME;
+	} else {
+		bytes_read = read(pfd.fd, &count, sizeof(count));
+		if (bytes_read > 0) {
+			if (ctx->verbose)
+				AFU_MF_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
+					poll_ret, count);
+			ret = 0;
+		} else {
+			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
+				strerror(errno) : "zero bytes read");
+			ret = -EIO;
+		}
+	}
+out:
+	clear_interrupt(ctx);
+	return ret;
+}
+
+static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
+{
+	msgdma_status status;
+	uint64_t fpga_queue_full = 0;
+
+	if (!ctx)
+		return;
+
+	if (ctx->verbose) {
+		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
+			desc->rd_address_ext, desc->rd_address);
+		AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
+			desc->wr_address_ext, desc->wr_address);
+		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
+		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
+			desc->wr_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
+			desc->rd_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
+		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
+	}
+
+	do {
+		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
+		if (fpga_queue_full++ > 100000000) {
+			AFU_MF_PMD_DEBUG("DMA queue full retry");
+			fpga_queue_full = 0;
+		}
+	} while (status.desc_buf_full);
+
+	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
+		sizeof(*desc));
+}
+
+static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	int count, int is_last_desc, fpga_dma_type type, int intr_en)
+{
+	msgdma_ext_desc *desc = NULL;
+	int alignment_offset = 0;
+	int segment_size = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	/* src, dst and count must be 64-byte aligned */
+	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
+		!IS_DMA_ALIGNED(count))
+		return -EINVAL;
+	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
+
+	/* these fields are fixed for all DMA transfers */
+	desc = ctx->desc_buf;
+	desc->seq_num = 0;
+	desc->wr_stride = 1;
+	desc->rd_stride = 1;
+	desc->control.go = 1;
+	if (intr_en)
+		desc->control.transfer_irq_en = 1;
+	else
+		desc->control.transfer_irq_en = 0;
+
+	if (!is_last_desc)
+		desc->control.early_done_en = 1;
+	else
+		desc->control.early_done_en = 0;
+
+	if (type == FPGA_TO_FPGA) {
+		desc->rd_address = src & DMA_MASK_32_BIT;
+		desc->wr_address = dst & DMA_MASK_32_BIT;
+		desc->len = count;
+		desc->wr_burst_count = 4;
+		desc->rd_burst_count = 4;
+		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+		send_descriptor(ctx, desc);
+	} else {
+		/* check CCIP (host) address is aligned to 4CL (256B) */
+		alignment_offset = (type == HOST_TO_FPGA)
+			? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
+		/* performing a short transfer to get aligned */
+		if (alignment_offset != 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* count isn't large enough to hit next 4CL boundary */
+			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = CCIP_ALIGN_BYTES
+					- alignment_offset;
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			/* post short transfer to align to a 4CL (256 byte) */
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* at this point we are 4CL (256 byte) aligned */
+		if (count >= CCIP_ALIGN_BYTES) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 4;
+			desc->rd_burst_count = 4;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* buffer ends on 4CL boundary */
+			if ((count % CCIP_ALIGN_BYTES) == 0) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = count
+					- (count % CCIP_ALIGN_BYTES);
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* post short transfer to handle the remainder */
+		if (count > 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->len = count;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			if (intr_en)
+				desc->control.transfer_irq_en = 1;
+			send_descriptor(ctx, desc);
+		}
+	}
+
+	return 0;
+}
+
+static int issue_magic(struct dma_afu_ctx *ctx)
+{
+	*(ctx->magic_buf) = 0ULL;
+	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
+		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
+}
+
+static void wait_magic(struct dma_afu_ctx *ctx)
+{
+	int magic_timeout = 0;
+
+	if (!ctx)
+		return;
+
+	poll_interrupt(ctx);
+	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
+		if (magic_timeout++ > 1000) {
+			AFU_MF_PMD_ERR("DMA magic operation timeout");
+			magic_timeout = 0;
+			break;
+		}
+	}
+	*(ctx->magic_buf) = 0ULL;
+}
+
+static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, int *intr_issued)
+{
+	int intr_en = 0;
+	int ret = 0;
+
+	if (!ctx || !intr_issued)
+		return -EINVAL;
+
+	src += chunk * ctx->dma_buf_size;
+	dst += chunk * ctx->dma_buf_size;
+
+	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
+		if (*intr_issued) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+		intr_en = 1;
+	}
+
+	chunk %= NUM_DMA_BUF;
+	rte_memcpy(ctx->dma_buf[chunk], (void *)src, ctx->dma_buf_size);
+	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
+			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
+	if (intr_en)
+		*intr_issued = 1;
+
+	return ret;
+}
+
+static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t dma_tx_bytes = 0;
+	uint64_t offset = 0;
+	int issued_intr = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(dst)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_host_to_fpga(ctx, &dst, &src, count_left);
+
+		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
+			* DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - dst;
+		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_tx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1), &issued_intr);
+			if (ret)
+				return ret;
+		}
+
+		if (issued_intr) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+
+		if (count_left) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_tx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_tx_bytes);
+				rte_memcpy(ctx->dma_buf[0],
+					(void *)(src + offset),
+					dma_tx_bytes);
+				ret = do_dma(ctx, dst + offset,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
+				if (ret)
+					return ret;
+				ret = poll_interrupt(ctx);
+				if (ret)
+					return ret;
+			}
+
+			count_left -= dma_tx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_tx_bytes;
+				src += offset + dma_tx_bytes;
+				ret = ase_host_to_fpga(ctx, &dst, &src,
+					count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
+{
+	uint64_t i = chunk % NUM_DMA_BUF;
+	uint64_t n = *rx_count;
+	uint64_t num_pending = 0;
+	int ret = 0;
+
+	if (!ctx || !wf_issued)
+		return -EINVAL;
+
+	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
+		src + chunk * ctx->dma_buf_size,
+		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
+	if (ret)
+		return ret;
+
+	num_pending = chunk - n + 1;
+	if (num_pending == HALF_DMA_BUF) {
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
+		if (*wf_issued) {
+			wait_magic(ctx);
+			for (i = 0; i < HALF_DMA_BUF; i++) {
+				rte_memcpy((void *)(dst +
+						n * ctx->dma_buf_size),
+					ctx->dma_buf[n % NUM_DMA_BUF],
+					ctx->dma_buf_size);
+				n++;
+			}
+			*wf_issued = 0;
+			*rx_count = n;
+		}
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t pending_buf = 0;
+	uint64_t dma_rx_bytes = 0;
+	uint64_t offset = 0;
+	int wf_issued = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(src)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_fpga_to_host(ctx, &src, &dst, count_left);
+
+		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
+			 * DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - src;
+		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_rx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1),
+				&pending_buf, &wf_issued);
+			if (ret)
+				return ret;
+		}
+
+		if (wf_issued)
+			wait_magic(ctx);
+
+		/* clear out final dma memcpy operations */
+		while (pending_buf < dma_chunks) {
+			/* constant size transfer; no length check required */
+			rte_memcpy((void *)(dst +
+					pending_buf * ctx->dma_buf_size),
+				ctx->dma_buf[pending_buf % NUM_DMA_BUF],
+				ctx->dma_buf_size);
+			pending_buf++;
+		}
+
+		if (count_left > 0) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_rx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_rx_bytes);
+				ret = do_dma(ctx,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					src + offset,
+					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
+				if (ret)
+					return ret;
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+				rte_memcpy((void *)(dst + offset),
+					ctx->dma_buf[0], dma_rx_bytes);
+			}
+
+			count_left -= dma_rx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_rx_bytes;
+				src += offset + dma_rx_bytes;
+				ret = ase_fpga_to_host(ctx, &src, &dst,
+							count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t dma_chunks = 0;
+	uint64_t offset = 0;
+	uint32_t tx_chunks = 0;
+	uint64_t *tmp_buf = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
+	    && IS_DMA_ALIGNED(count_left)) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
+				(i == (dma_chunks - 1))) {
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+			}
+		}
+
+		if (count_left > 0) {
+			AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
+			ret = do_dma(ctx, dst + offset, src + offset,
+				count_left, 1, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			ret = issue_magic(ctx);
+			if (ret)
+				return ret;
+			wait_magic(ctx);
+		}
+	} else {
+		if ((src < dst) && (src + count_left > dst)) {
+			AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
+				" -> 0x%"PRIx64" (0x%"PRIx64")",
+				src, dst, count_left);
+			return -EINVAL;
+		}
+		tx_chunks = count_left / ctx->dma_buf_size;
+		offset = tx_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
+			" (%u...0x%"PRIx64")",
+			src, dst, tx_chunks, count_left);
+		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
+			DMA_ALIGN_BYTES);
+		for (i = 0; i < tx_chunks; i++) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx,
+				dst + i * ctx->dma_buf_size,
+				(uint64_t)tmp_buf, ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+		}
+
+		if (count_left > 0) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + offset, count_left);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx, dst + offset,
+				(uint64_t)tmp_buf, count_left);
+			if (ret)
+				goto free_buf;
+		}
+free_buf:
+		rte_free(tmp_buf);
+	}
+
+	return ret;
+}
+
+static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
+	uint64_t src, size_t count, fpga_dma_type type)
+{
+	int ret = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (type == HOST_TO_FPGA)
+		ret = dma_host_to_fpga(ctx, dst, src, count);
+	else if (type == FPGA_TO_HOST)
+		ret = dma_fpga_to_host(ctx, dst, src, count);
+	else if (type == FPGA_TO_FPGA)
+		ret = dma_fpga_to_fpga(ctx, dst, src, count);
+	else
+		return -EINVAL;
+
+	return ret;
+}
+
+static double getTime(struct timespec start, struct timespec end)
+{
+	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
+		+ end.tv_nsec - start.tv_nsec;
+	return (double)diff / (double)1000000000L;
+}
+
+#define SWEEP_ITERS 1
+static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
+	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
+{
+	struct timespec start, end;
+	uint64_t test_size = 0;
+	uint64_t *dma_buf_ptr = NULL;
+	double throughput, total_time = 0.0;
+	int i = 0;
+	int ret = 0;
+
+	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
+		AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
+		return -EINVAL;
+	}
+
+	if (length < (buf_offset + size_decrement)) {
+		AFU_MF_PMD_ERR("Test length does not match unaligned parameter");
+		return -EINVAL;
+	}
+	test_size = length - (buf_offset + size_decrement);
+	if ((ddr_offset + test_size) > ctx->mem_size) {
+		AFU_MF_PMD_ERR("Test is out of DDR memory space");
+		return -EINVAL;
+	}
+
+	dma_buf_ptr = (uint64_t *)((uint64_t)ctx->data_buf + buf_offset);
+	printf("Sweep Host %p to FPGA 0x%"PRIx64
+		" with 0x%"PRIx64" bytes ...\n",
+		(void *)dma_buf_ptr, ddr_offset, test_size);
+
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
+			test_size, HOST_TO_FPGA);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
+		ddr_offset, (void *)dma_buf_ptr, test_size);
+
+	total_time = 0.0;
+	memset((char *)dma_buf_ptr, 0, test_size);
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
+			test_size, FPGA_TO_HOST);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Verifying buffer ...\n");
+	return dma_afu_buf_verify(ctx, test_size);
+}
+
+static int dma_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_dma_cfg *cfg = NULL;
+	msgdma_ctrl ctrl;
+	uint64_t offset = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->dma_cfg;
+	if (cfg->index >= NUM_N3000_DMA)
+		return -EINVAL;
+	ctx = &priv->dma_ctx[cfg->index];
+
+	ctx->pattern = (int)cfg->pattern;
+	ctx->verbose = (int)cfg->verbose;
+	ctx->dma_buf_size = cfg->size;
+
+	ret = dma_afu_buf_alloc(ctx, cfg);
+	if (ret)
+		goto free;
+
+	printf("Initialize test buffer\n");
+	dma_afu_buf_init(ctx, cfg->length);
+
+	/* enable interrupt */
+	ctrl.csr = 0;
+	ctrl.global_intr_en_mask = 1;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
+		cfg->offset, cfg->length);
+	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
+		cfg->length, HOST_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from host to FPGA");
+		goto end;
+	}
+	memset(ctx->data_buf, 0, cfg->length);
+
+	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
+		offset = cfg->offset + cfg->length;
+	else if (cfg->offset > cfg->length)
+		offset = 0;
+	else
+		goto end;
+
+	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
+		cfg->offset, offset, cfg->length);
+	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
+		FPGA_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to FPGA");
+		goto end;
+	}
+
+	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	printf("Sweep with aligned address and size\n");
+	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
+	if (ret)
+		goto end;
+
+	if (cfg->unaligned) {
+		printf("Sweep with unaligned address and size\n");
+		struct unaligned_set {
+			uint64_t addr_offset;
+			uint64_t size_dec;
+		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
+		for (i = 0; i < ARRAY_SIZE(param); i++) {
+			ret = sweep_test(ctx, cfg->length, cfg->offset,
+				param[i].addr_offset, param[i].size_dec);
+			if (ret)
+				break;
+		}
+	}
+
+end:
+	/* disable interrupt */
+	ctrl.global_intr_en_mask = 0;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev *dev)
+{
+	struct rte_afu_device *afudev = NULL;
+
+	if (!dev || !dev->rawdev || !dev->rawdev->device)
+		return NULL;
+
+	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
+	if (!afudev->rawdev || !afudev->rawdev->device)
+		return NULL;
+
+	return RTE_DEV_TO_PCI(afudev->rawdev->device);
+}
+
+static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
+	uint32_t count, int *efds)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	struct vfio_irq_set *irq_set = NULL;
+	int vfio_dev_fd = 0;
+	size_t sz = 0;
+	int ret = 0;
+
+	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
+		return -EINVAL;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return -ENODEV;
+	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
+
+	sz = sizeof(*irq_set) + sizeof(*efds) * count;
+	irq_set = rte_zmalloc(NULL, sz, 0);
+	if (!irq_set)
+		return -ENOMEM;
+
+	irq_set->argsz = (uint32_t)sz;
+	irq_set->count = count;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+		VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = vec_start;
+
+	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
+	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret)
+		AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
+
+	rte_free(irq_set);
+	return ret;
+}
+
+static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint32_t bar = 0;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return NULL;
+
+	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
+	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
+	if (!PORT_IMPLEMENTED(val)) {
+		AFU_MF_PMD_INFO("FIU port %d is not implemented", dev->port);
+		return NULL;
+	}
+
+	bar = PORT_BAR(val);
+	if (bar >= PCI_MAX_RESOURCE) {
+		AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
+		return NULL;
+	}
+
+	addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
+	return addr;
+}
+
+static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
+	uint32_t *vec_start, uint32_t *vec_count)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint64_t header = 0;
+	uint64_t next_offset = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
+			(DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
+			val = rte_read64(addr + PORT_UINT_CAP_REG);
+			if (vec_start)
+				*vec_start = PORT_VEC_START(val);
+			if (vec_count)
+				*vec_count = PORT_VEC_COUNT(val);
+			return 0;
+		}
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return -ENOENT;
+}
+
+static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+	ctx->addr = addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
+	return 0;
+
+release:
+	nlb_afu_ctx_release(dev);
+	return ret;
+}
+
+static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[0];
+
+	rte_free(ctx->desc_buf);
+	ctx->desc_buf = NULL;
+
+	rte_free(ctx->magic_buf);
+	ctx->magic_buf = NULL;
+
+	close(ctx->event_fd);
+	return 0;
+}
+
+static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
+	static int efds[1] = {0};
+	uint32_t vec_start = 0;
+	int ret = 0;
+
+	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[index];
+	ctx->index = index;
+	ctx->addr = addr;
+	ctx->csr_addr = addr + DMA_CSR;
+	ctx->desc_addr = addr + DMA_DESC;
+	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
+	ctx->ase_data_addr = addr + DMA_ASE_DATA;
+	ctx->mem_size = mem_sz[ctx->index];
+	ctx->cur_ase_page = INVALID_ASE_PAGE;
+	if (ctx->index == 0) {
+		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
+		if (ret)
+			return ret;
+
+		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+		if (efds[0] < 0) {
+			AFU_MF_PMD_ERR("eventfd create failed");
+			return -EBADF;
+		}
+
+		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
+			AFU_MF_PMD_ERR("DMA interrupt setup failed");
+	}
+	ctx->event_fd = efds[0];
+
+	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
+		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
+	if (!ctx->desc_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->magic_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
+	if (ctx->magic_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	return 0;
+
+release:
+	dma_afu_ctx_release(dev);
+	return ret;
+}
+
+static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	uint8_t *addr = NULL;
+	uint64_t header = 0;
+	uint64_t uuid_hi = 0;
+	uint64_t uuid_lo = 0;
+	uint64_t next_offset = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	addr = (uint8_t *)dev->addr;
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
+		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
+
+		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
+			(uuid_lo == N3000_NLB0_UUID_L) &&
+			(uuid_hi == N3000_NLB0_UUID_H)) {
+			AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
+			ret = nlb_afu_ctx_init(dev, addr);
+			if (ret)
+				return ret;
+		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
+			(uuid_lo == N3000_DMA_UUID_L) &&
+			(uuid_hi == N3000_DMA_UUID_H) &&
+			(priv->num_dma < NUM_N3000_DMA)) {
+			AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
+				priv->num_dma, (void *)addr);
+			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
+			if (ret)
+				return ret;
+			priv->num_dma++;
+		} else {
+			AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
+				", uuid %016"PRIx64"%016"PRIx64,
+				DFH_TYPE(header), uuid_hi, uuid_lo);
+		}
+
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return 0;
+}
+
+static int n3000_afu_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return n3000_afu_ctx_init(dev);
+}
+
+static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
+	int i = 0;
+	uint64_t top = 0;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
+	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
+		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
+			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
+			return -EINVAL;
+		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.multi_cl != 1) &&
+			(cfg->nlb_cfg.multi_cl != 2) &&
+			(cfg->nlb_cfg.multi_cl != 4))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
+			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
+			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
+			return -EINVAL;
+		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
+			sizeof(struct rte_pmd_afu_nlb_cfg));
+	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
+		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
+			return -EINVAL;
+		i = cfg->dma_cfg.index;
+		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
+		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
+			return -EINVAL;
+		if (i == 3) {  /* QDR connected to DMA3 */
+			if (cfg->dma_cfg.length & 0x3f) {
+				cfg->dma_cfg.length &= ~0x3f;
+				AFU_MF_PMD_INFO("Round size to %x for QDR",
+					cfg->dma_cfg.length);
+			}
+		}
+		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
+			sizeof(struct rte_pmd_afu_dma_cfg));
+	} else {
+		AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
+		return -EINVAL;
+	}
+
+	priv->cfg_type = cfg->type;
+	return 0;
+}
+
+static int n3000_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		AFU_MF_PMD_INFO("Test NLB");
+		ret = nlb_afu_test(dev);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
+		ret = dma_afu_test(dev);
+	} else {
+		AFU_MF_PMD_ERR("Please configure AFU before test");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int n3000_afu_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	nlb_afu_ctx_release(dev);
+	dma_afu_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct n3000_afu_priv *priv = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+		fprintf(f, "dsm_iova:\t%p\n", (void *)ctx->dsm_iova);
+		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+		fprintf(f, "src_iova:\t%p\n", (void *)ctx->src_iova);
+		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+		fprintf(f, "dest_iova:\t%p\n", (void *)ctx->dest_iova);
+		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
+		fprintf(f, "index:\t\t%d\n", ctx->index);
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
+		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
+		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
+		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
+		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
+		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
+		fprintf(f, "magic_iova:\t%p\n", (void *)ctx->magic_iova);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int n3000_afu_reset(struct afu_mf_rawdev *dev)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	val = rte_read64(addr + PORT_CTRL_REG);
+	val |= PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+	rte_delay_us(100);
+	val &= ~PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+
+	return 0;
+}
+
+static struct afu_mf_ops n3000_afu_ops = {
+	.init = n3000_afu_init,
+	.config = n3000_afu_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = n3000_afu_test,
+	.close = n3000_afu_close,
+	.dump = n3000_afu_dump,
+	.reset = n3000_afu_reset
+};
+
+struct afu_mf_drv n3000_afu_drv = {
+	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	.ops = &n3000_afu_ops
+};
diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h
new file mode 100644
index 0000000..38104ac
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _N3000_AFU_H_
+#define _N3000_AFU_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define N3000_AFU_UUID_L  0xc000c9660d824272
+#define N3000_AFU_UUID_H  0x9aeffe5f84570612
+#define N3000_NLB0_UUID_L 0xf89e433683f9040b
+#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
+#define N3000_DMA_UUID_L  0xa9149a35bace01ea
+#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
+
+extern struct afu_mf_drv n3000_afu_drv;
+
+#define NUM_N3000_DMA  4
+#define MAX_MSIX_VEC   7
+
+/* N3000 DFL definition */
+#define DFH_UUID_L_OFFSET  8
+#define DFH_UUID_H_OFFSET  16
+#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
+#define DFH_TYPE_AFU  1
+#define DFH_TYPE_BBB  2
+#define DFH_TYPE_PRIVATE  3
+#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
+#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
+#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
+#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
+#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
+#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
+#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
+#define PORT_FEATURE_UINT_ID  0x12
+#define PORT_UINT_CAP_REG  0x8
+#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
+#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
+#define PORT_CTRL_REG  0x38
+#define PORT_SOFT_RESET  (0x1 << 0)
+
+/* NLB registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_STATUS         0x40
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct nlb_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct nlb_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t wrthru_en:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd1:1;
+			uint32_t delay_en:1;
+			uint32_t rdsel:2;
+			uint32_t rsvd2:1;
+			uint32_t chsel:3;
+			uint32_t rsvd3:1;
+			uint32_t wrpush_i:1;
+			uint32_t wr_chsel:3;
+			uint32_t rsvd4:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t wrfence_chsel:2;
+		};
+	};
+};
+
+struct nlb_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct nlb_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct nlb_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+/* DMA registers definition */
+#define DMA_CSR       0x40
+#define DMA_DESC      0x60
+#define DMA_ASE_CTRL  0x200
+#define DMA_ASE_DATA  0x1000
+
+#define DMA_ASE_WINDOW       4096
+#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
+#define INVALID_ASE_PAGE     0xffffffffffffffffULL
+
+#define DMA_WF_MAGIC             0x5772745F53796E63ULL
+#define DMA_WF_MAGIC_ROM         0x1000000000000
+#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
+#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
+
+#define NUM_DMA_BUF   8
+#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
+
+#define DMA_MASK_32_BIT 0xFFFFFFFF
+
+#define DMA_CSR_BUSY           0x1
+#define DMA_DESC_BUFFER_EMPTY  0x2
+#define DMA_DESC_BUFFER_FULL   0x4
+
+#define DWORD_BYTES 4
+#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
+
+#define QWORD_BYTES 8
+#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
+
+#define DMA_ALIGN_BYTES 64
+#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
+
+#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
+
+#define DMA_TIMEOUT_MSEC  5000
+
+#define MAGIC_BUF_SIZE  64
+#define ERR_CHECK_LIMIT  64
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+typedef enum {
+	HOST_TO_FPGA = 0,
+	FPGA_TO_HOST,
+	FPGA_TO_FPGA,
+	FPGA_MAX_TRANSFER_TYPE,
+} fpga_dma_type;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t tx_channel:8;
+		uint32_t generate_sop:1;
+		uint32_t generate_eop:1;
+		uint32_t park_reads:1;
+		uint32_t park_writes:1;
+		uint32_t end_on_eop:1;
+		uint32_t reserved_1:1;
+		uint32_t transfer_irq_en:1;
+		uint32_t early_term_irq_en:1;
+		uint32_t trans_error_irq_en:8;
+		uint32_t early_done_en:1;
+		uint32_t reserved_2:6;
+		uint32_t go:1;
+	};
+} msgdma_desc_ctrl;
+
+typedef struct __rte_packed {
+	uint32_t rd_address;
+	uint32_t wr_address;
+	uint32_t len;
+	uint16_t seq_num;
+	uint8_t rd_burst_count;
+	uint8_t wr_burst_count;
+	uint16_t rd_stride;
+	uint16_t wr_stride;
+	uint32_t rd_address_ext;
+	uint32_t wr_address_ext;
+	msgdma_desc_ctrl control;
+} msgdma_ext_desc;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t busy:1;
+		uint32_t desc_buf_empty:1;
+		uint32_t desc_buf_full:1;
+		uint32_t rsp_buf_empty:1;
+		uint32_t rsp_buf_full:1;
+		uint32_t stopped:1;
+		uint32_t resetting:1;
+		uint32_t stopped_on_errror:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t irq:1;
+		uint32_t reserved:22;
+	};
+} msgdma_status;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t stop_dispatcher:1;
+		uint32_t reset_dispatcher:1;
+		uint32_t stop_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t global_intr_en_mask:1;
+		uint32_t stop_descriptors:1;
+		uint32_t reserved:22;
+	};
+} msgdma_ctrl;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_fill_level:16;
+		uint32_t wr_fill_level:16;
+	};
+} msgdma_fill_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rsp_fill_level:16;
+		uint32_t reserved:16;
+	};
+} msgdma_rsp_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_seq_num:16;
+		uint32_t wr_seq_num:16;
+	};
+} msgdma_seq_num;
+
+typedef struct __rte_packed {
+	msgdma_status status;
+	msgdma_ctrl ctrl;
+	msgdma_fill_level fill_level;
+	msgdma_rsp_level rsp;
+	msgdma_seq_num seq_num;
+} msgdma_csr;
+
+#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
+#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
+
+struct nlb_afu_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct nlb_dsm_status *status_ptr;
+};
+
+struct dma_afu_ctx {
+	int index;
+	uint8_t *addr;
+	uint8_t *csr_addr;
+	uint8_t *desc_addr;
+	uint8_t *ase_ctrl_addr;
+	uint8_t *ase_data_addr;
+	uint64_t mem_size;
+	uint64_t cur_ase_page;
+	int event_fd;
+	int verbose;
+	int pattern;
+	void *data_buf;
+	void *ref_buf;
+	msgdma_ext_desc *desc_buf;
+	uint64_t *magic_buf;
+	uint64_t magic_iova;
+	uint32_t dma_buf_size;
+	uint64_t *dma_buf[NUM_DMA_BUF];
+	uint64_t dma_iova[NUM_DMA_BUF];
+};
+
+struct n3000_afu_priv {
+	struct rte_pmd_afu_nlb_cfg nlb_cfg;
+	struct rte_pmd_afu_dma_cfg dma_cfg;
+	struct nlb_afu_ctx nlb_ctx;
+	struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
+	int num_dma;
+	int cfg_type;
+};
+
+#endif /* _N3000_AFU_H_ */
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
new file mode 100644
index 0000000..89d866a
--- /dev/null
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __RTE_PMD_AFU_H__
+#define __RTE_PMD_AFU_H__
+
+/**
+ * @file rte_pmd_afu.h
+ *
+ * AFU PMD specific definitions.
+ *
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_PMD_AFU_N3000_NLB   1
+#define RTE_PMD_AFU_N3000_DMA   2
+
+#define NLB_MODE_LPBK      0
+#define NLB_MODE_READ      1
+#define NLB_MODE_WRITE     2
+#define NLB_MODE_TRPUT     3
+
+#define NLB_VC_AUTO        0
+#define NLB_VC_VL0         1
+#define NLB_VC_VH0         2
+#define NLB_VC_VH1         3
+#define NLB_VC_RANDOM      4
+
+#define NLB_WRLINE_M       0
+#define NLB_WRLINE_I       1
+#define NLB_WRPUSH_I       2
+
+#define NLB_RDLINE_S       0
+#define NLB_RDLINE_I       1
+#define NLB_RDLINE_MIXED   2
+
+#define MIN_CACHE_LINES   1
+#define MAX_CACHE_LINES   1024
+
+#define MIN_DMA_BUF_SIZE  64
+#define MAX_DMA_BUF_SIZE  (1023 * 1024)
+
+/**
+ * NLB AFU configuration data structure.
+ */
+struct rte_pmd_afu_nlb_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t cache_policy;
+	uint32_t cache_hint;
+	uint32_t read_vc;
+	uint32_t write_vc;
+	uint32_t wrfence_vc;
+	uint32_t freq_mhz;
+};
+
+/**
+ * DMA AFU configuration data structure.
+ */
+struct rte_pmd_afu_dma_cfg {
+	uint32_t index;     /* index of DMA controller */
+	uint32_t length;    /* total length of data to DMA */
+	uint32_t offset;    /* address offset of target memory */
+	uint32_t size;      /* size of transfer buffer */
+	uint32_t pattern;   /* data pattern to fill in test buffer */
+	uint32_t unaligned; /* use unaligned address or length in sweep test */
+	uint32_t verbose;   /* enable verbose error information in test */
+};
+
+/**
+ * N3000 AFU configuration data structure.
+ */
+struct rte_pmd_afu_n3000_cfg {
+	int type;   /* RTE_PMD_AFU_N3000_NLB or RTE_PMD_AFU_N3000_DMA */
+	union {
+		struct rte_pmd_afu_nlb_cfg nlb_cfg;
+		struct rte_pmd_afu_dma_cfg dma_cfg;
+	};
+};
+
+/**
+ * HE-LBK & HE-MEM-LBK AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_lbk_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t trput_interleave;
+	uint32_t freq_mhz;
+};
+
+/**
+ * HE-MEM-TG AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_mem_tg_cfg {
+	uint32_t channel_mask;   /* mask of traffic generator channel */
+};
+
+/**
+ * HE-HSSI AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_hssi_cfg {
+	uint32_t port;
+	uint32_t timeout;
+	uint32_t num_packets;
+	uint32_t random_length;
+	uint32_t packet_length;
+	uint32_t random_payload;
+	uint32_t rnd_seed[3];
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	int he_loopback;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PMD_AFU_H__ */
diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map
new file mode 100644
index 0000000..c2e0723
--- /dev/null
+++ b/drivers/raw/afu_mf/version.map
@@ -0,0 +1,3 @@
+DPDK_22 {
+	local: *;
+};
diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
index 05e7de1..c3627f7 100644
--- a/drivers/raw/meson.build
+++ b/drivers/raw/meson.build
@@ -6,6 +6,7 @@ if is_windows
 endif
 
 drivers = [
+        'afu_mf',
         'cnxk_bphy',
         'cnxk_gpio',
         'dpaa2_cmdif',
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2] raw/afu_mf: introduce AFU MF device driver
  2022-05-17  6:29 [PATCH v1] raw/afu_mf: introduce AFU MF device driver Wei Huang
@ 2022-05-17  7:34 ` Wei Huang
  2022-05-19  2:43   ` [PATCH v3] " Wei Huang
  0 siblings, 1 reply; 57+ messages in thread
From: Wei Huang @ 2022-05-17  7:34 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

Add afu_mf driver to manage various AFU (Acceleration Function Unit)
in FPGA.

Signed-off-by: Wei Huang <wei.huang@intel.com>
Acked-by: Tianfei Zhang <tianfei.zhang@intel.com>
---
v2: fix typo
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
 drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
 drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
 drivers/raw/afu_mf/he_hssi.h       |  102 ++
 drivers/raw/afu_mf/he_lbk.c        |  429 ++++++++
 drivers/raw/afu_mf/he_lbk.h        |  121 +++
 drivers/raw/afu_mf/he_mem.c        |  181 ++++
 drivers/raw/afu_mf/he_mem.h        |   40 +
 drivers/raw/afu_mf/meson.build     |    8 +
 drivers/raw/afu_mf/n3000_afu.c     | 1997 ++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
 drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
 drivers/raw/afu_mf/version.map     |    3 +
 drivers/raw/meson.build            |    1 +
 14 files changed, 4247 insertions(+)
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
 create mode 100644 drivers/raw/afu_mf/he_hssi.c
 create mode 100644 drivers/raw/afu_mf/he_hssi.h
 create mode 100644 drivers/raw/afu_mf/he_lbk.c
 create mode 100644 drivers/raw/afu_mf/he_lbk.h
 create mode 100644 drivers/raw/afu_mf/he_mem.c
 create mode 100644 drivers/raw/afu_mf/he_mem.h
 create mode 100644 drivers/raw/afu_mf/meson.build
 create mode 100644 drivers/raw/afu_mf/n3000_afu.c
 create mode 100644 drivers/raw/afu_mf/n3000_afu.h
 create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
 create mode 100644 drivers/raw/afu_mf/version.map

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
new file mode 100644
index 0000000..f24c748
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -0,0 +1,440 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_rawdev_pmd.h>
+
+#include "rte_pmd_afu.h"
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+#include "he_lbk.h"
+#include "he_mem.h"
+#include "he_hssi.h"
+
+#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
+
+static const struct rte_afu_uuid afu_uuid_map[] = {
+	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
+	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	{ HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	{ 0, 0 /* sentinel */ }
+};
+
+static struct afu_mf_drv *afu_table[] = {
+	&n3000_afu_drv,
+	&he_lbk_drv,
+	&he_mem_lbk_drv,
+	&he_mem_tg_drv,
+	&he_hssi_drv,
+	NULL
+};
+
+static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
+{
+	int32_t x = 0;
+
+	if (!dev || !dev->shared)
+		return -ENODEV;
+
+	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
+
+	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1,
+				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
+{
+	if (!dev || !dev->shared)
+		return;
+
+	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
+}
+
+static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
+	rte_rawdev_obj_t config, size_t config_size)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->config)
+		ret = (*dev->ops->config)(dev, config, config_size);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_start(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please start it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->start)
+		ret = (*dev->ops->start)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
+		return;
+	}
+
+	if (dev->ops && dev->ops->stop)
+		ret = (*dev->ops->stop)(dev);
+
+	afu_mf_unlock(dev);
+}
+
+static int afu_mf_rawdev_close(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->close)
+		ret = (*dev->ops->close)(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->reset)
+		ret = (*dev->ops->reset)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_selftest(uint16_t dev_id)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
+		return -ENODEV;
+
+	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
+	if (!dev)
+		return -ENOENT;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please test it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->test)
+		ret = (*dev->ops->test)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->dump)
+		ret = (*dev->ops->dump)(dev, f);
+
+	return ret;
+}
+
+static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
+	.dev_info_get = NULL,
+	.dev_configure = afu_mf_rawdev_configure,
+	.dev_start = afu_mf_rawdev_start,
+	.dev_stop = afu_mf_rawdev_stop,
+	.dev_close = afu_mf_rawdev_close,
+	.dev_reset = afu_mf_rawdev_reset,
+
+	.queue_def_conf = NULL,
+	.queue_setup = NULL,
+	.queue_release = NULL,
+	.queue_count = NULL,
+
+	.attr_get = NULL,
+	.attr_set = NULL,
+
+	.enqueue_bufs = NULL,
+	.dequeue_bufs = NULL,
+
+	.dump = afu_mf_rawdev_dump,
+
+	.xstats_get = NULL,
+	.xstats_get_names = NULL,
+	.xstats_get_by_name = NULL,
+	.xstats_reset = NULL,
+
+	.firmware_status_get = NULL,
+	.firmware_version_get = NULL,
+	.firmware_load = NULL,
+	.firmware_unload = NULL,
+
+	.dev_selftest = afu_mf_rawdev_selftest,
+};
+
+static int
+afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
+	int socket_id)
+{
+	const struct rte_memzone *mz;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	struct afu_mf_shared *ptr = NULL;
+	int init_mz = 0;
+
+	if (!name || !data)
+		return -EINVAL;
+
+	/* name format is afu_?|??:??.? which is unique */
+	snprintf(mz_name, sizeof(mz_name), "%s", name);
+
+	mz = rte_memzone_lookup(mz_name);
+	if (!mz) {
+		mz = rte_memzone_reserve(mz_name,
+				sizeof(struct afu_mf_shared),
+				socket_id, 0);
+		init_mz = 1;
+	}
+
+	if (!mz) {
+		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
+			mz_name);
+		return -ENOMEM;
+	}
+
+	ptr = (struct afu_mf_shared *)mz->addr;
+
+	if (init_mz)  /* initialize memory zone on the first time */
+		ptr->lock = 0;
+
+	*data = ptr;
+
+	return 0;
+}
+
+static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char *name,
+	size_t size)
+{
+	int n = 0;
+
+	if (!afu_dev || !name || !size)
+		return -EINVAL;
+
+	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
+	if (n >= (int)size) {
+		AFU_MF_PMD_ERR("Name of AFU device is too long!");
+		return -ENAMETOOLONG;
+	}
+
+	return 0;
+}
+
+static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
+{
+	struct afu_mf_drv *entry = NULL;
+	int i = 0;
+
+	if (!afu_id)
+		return NULL;
+
+	while ((entry = afu_table[i++])) {
+		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
+			(entry->uuid.uuid_high == afu_id->uuid_high))
+			break;
+	}
+
+	return entry ? entry->ops : NULL;
+}
+
+static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int socket_id)
+{
+	struct rte_rawdev *rawdev = NULL;
+	struct afu_mf_rawdev *dev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
+		name, socket_id);
+
+	/* Allocate device structure */
+	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct afu_mf_rawdev),
+				socket_id);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Unable to allocate raw device");
+		return -ENOMEM;
+	}
+
+	rawdev->dev_ops = &afu_mf_rawdev_ops;
+	rawdev->device = &afu_dev->device;
+	rawdev->driver_name = afu_dev->driver->driver.name;
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		goto cleanup;
+
+	dev->rawdev = rawdev;
+	dev->port = afu_dev->id.port;
+	dev->addr = afu_dev->mem_resource[0].addr;
+	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
+	if (dev->ops == NULL) {
+		AFU_MF_PMD_ERR("Unsupported AFU device");
+		goto cleanup;
+	}
+
+	if (dev->ops->init) {
+		ret = (*dev->ops->init)(dev);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed to init %s", name);
+			goto cleanup;
+		}
+	}
+
+	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
+	if (ret)
+		goto cleanup;
+
+	return ret;
+
+cleanup:
+	rte_rawdev_pmd_release(rawdev);
+	return ret;
+}
+
+static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev)
+{
+	struct rte_rawdev *rawdev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Destroy raw device %s", name);
+
+	rawdev = rte_rawdev_pmd_get_named_dev(name);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Raw device %s not found", name);
+		return -EINVAL;
+	}
+
+	/* rte_rawdev_close is called by pmd_release */
+	ret = rte_rawdev_pmd_release(rawdev);
+	if (ret)
+		AFU_MF_PMD_DEBUG("Device cleanup failed");
+
+	return 0;
+}
+
+static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_create(afu_dev, rte_socket_id());
+}
+
+static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_destroy(afu_dev);
+}
+
+static struct rte_afu_driver afu_mf_pmd_drv = {
+	.id_table = afu_uuid_map,
+	.probe = afu_mf_rawdev_probe,
+	.remove = afu_mf_rawdev_remove
+};
+
+RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv);
+RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h b/drivers/raw/afu_mf/afu_mf_rawdev.h
new file mode 100644
index 0000000..5690010
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __AFU_MF_RAWDEV_H__
+#define __AFU_MF_RAWDEV_H__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+extern int afu_mf_pmd_logtype;
+
+#define AFU_MF_PMD_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
+		__func__, ##args)
+
+#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
+
+#define AFU_MF_PMD_DEBUG(fmt, args...) \
+	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
+#define AFU_MF_PMD_INFO(fmt, args...) \
+	AFU_MF_PMD_LOG(INFO, fmt, ## args)
+#define AFU_MF_PMD_ERR(fmt, args...) \
+	AFU_MF_PMD_LOG(ERR, fmt, ## args)
+#define AFU_MF_PMD_WARN(fmt, args...) \
+	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
+
+#define CACHE_LINE_SIZE(n)  ((n) << 6)
+#define CACHE_LINE_ALIGNED(n)  ((n) >> 6)
+#define MHZ(f)  ((f) * 1000000)
+
+#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
+({                                                       \
+	uint64_t __wait = 0;                                 \
+	uint64_t __invl = (invl);                            \
+	uint64_t __timeout = (timeout);                      \
+	for (; __wait <= __timeout; __wait += __invl) {      \
+		(val) = *(addr);                                 \
+		if (cond)                                        \
+			break;                                       \
+		rte_delay_ms(__invl);                            \
+	}                                                    \
+	(cond) ? 0 : 1;                                      \
+})
+
+struct afu_mf_rawdev;
+
+struct afu_mf_ops {
+	int (*init)(struct afu_mf_rawdev *dev);
+	int (*config)(struct afu_mf_rawdev *dev, void *config,
+		size_t config_size);
+	int (*start)(struct afu_mf_rawdev *dev);
+	int (*stop)(struct afu_mf_rawdev *dev);
+	int (*test)(struct afu_mf_rawdev *dev);
+	int (*close)(struct afu_mf_rawdev *dev);
+	int (*reset)(struct afu_mf_rawdev *dev);
+	int (*dump)(struct afu_mf_rawdev *dev, FILE *f);
+};
+
+struct afu_mf_drv {
+	struct rte_afu_uuid uuid;
+	struct afu_mf_ops *ops;
+};
+
+struct afu_mf_shared {
+	int32_t lock;
+};
+
+struct afu_mf_rawdev {
+	struct rte_rawdev *rawdev;  /* point to parent raw device */
+	struct afu_mf_shared *shared;  /* shared data for multi-process */
+	struct afu_mf_ops *ops;  /* device operation functions */
+	int port;  /* index of port the AFU attached */
+	void *addr;  /* base address of AFU registers */
+	void *priv;  /* private driver data */
+};
+
+static inline struct afu_mf_rawdev *
+afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev)
+{
+	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL;
+}
+
+#endif /* __AFU_MF_RAWDEV_H__ */
diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c
new file mode 100644
index 0000000..68d8dba
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_hssi.h"
+
+static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr, value);
+
+	if (!ctx)
+		return -EINVAL;
+
+	data.write_data = value;
+	rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
+
+	cmd.csr = 0;
+	cmd.write_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	return 0;
+}
+
+static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t *value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	cmd.csr = 0;
+	cmd.read_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans) {
+			data.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_DATA);
+			*value = data.read_data;
+			break;
+		}
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIME;
+
+	AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr, *value);
+	return 0;
+}
+
+static void he_hssi_report(struct he_hssi_ctx *ctx)
+{
+	uint32_t val = 0;
+	uint64_t v64 = 0;
+	int ret = 0;
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
+	if (ret)
+		return;
+	printf("Number of good packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
+	if (ret)
+		return;
+	printf("Number of bad packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
+	if (ret)
+		return;
+	v64 = val;
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
+	if (ret)
+		return;
+	v64 = (v64 << 32) | val;
+	printf("Number of bytes received: %"PRIu64"\n", v64);
+
+	ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
+	if (ret)
+		return;
+	if (val & ERR_VALID) {
+		printf("AVST rx error:");
+		if (val & OVERFLOW_ERR)
+			printf(" overflow");
+		if (val & LENGTH_ERR)
+			printf(" length");
+		if (val & OVERSIZE_ERR)
+			printf(" oversize");
+		if (val & UNDERSIZE_ERR)
+			printf(" undersize");
+		if (val & MAC_CRC_ERR)
+			printf(" crc");
+		if (val & PHY_ERR)
+			printf(" phy");
+		printf("\n");
+	}
+
+	ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
+	if (ret)
+		return;
+	if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
+		printf("FIFO status:");
+		if (val & ALMOST_EMPTY)
+			printf(" almost empty");
+		if (val & ALMOST_FULL)
+			printf(" almost full");
+		printf("\n");
+	}
+}
+
+static int he_hssi_test(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+	struct traffic_ctrl_ch_sel sel;
+	uint32_t val = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_hssi_cfg;
+	ctx = &priv->he_hssi_ctx;
+
+	ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
+	if (ret)
+		return ret;
+
+	sel.channel_sel = cfg->port;
+	rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
+
+	if (cfg->he_loopback >= 0) {
+		val = cfg->he_loopback ? 1 : 0;
+		AFU_MF_PMD_INFO("%s HE loopback on port %u",
+			val ? "Enable" : "Disable", cfg->port);
+		return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
+	if (ret)
+		return ret;
+
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
+	if (ret)
+		return ret;
+
+	val = cfg->src_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->src_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->dest_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->dest_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_length ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_payload ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 3; i++) {
+		ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
+			cfg->rnd_seed[i]);
+		if (ret)
+			return ret;
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
+	if (ret)
+		return ret;
+
+	while (i++ < cfg->timeout) {
+		ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
+		if (ret)
+			break;
+		if (val == cfg->num_packets)
+			break;
+		sleep(1);
+	}
+
+	he_hssi_report(ctx);
+
+	return ret;
+}
+
+static int he_hssi_init(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_hssi_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
+	if (cfg->port >= NUM_HE_HSSI_PORTS)
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
+
+	return 0;
+}
+
+static int he_hssi_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_hssi_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_hssi_ops = {
+	.init = he_hssi_init,
+	.config = he_hssi_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_hssi_test,
+	.close = he_hssi_close,
+	.dump = he_hssi_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_hssi_drv = {
+	.uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	.ops = &he_hssi_ops
+};
diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h
new file mode 100644
index 0000000..f8b9623
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_HSSI_H_
+#define _HE_HSSI_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_HSSI_UUID_L    0xbb370242ac130002
+#define HE_HSSI_UUID_H    0x823c334c98bf11ea
+#define NUM_HE_HSSI_PORTS 8
+
+extern struct afu_mf_drv he_hssi_drv;
+
+/* HE-HSSI registers definition */
+#define TRAFFIC_CTRL_CMD    0x30
+#define TRAFFIC_CTRL_DATA   0x38
+#define TRAFFIC_CTRL_CH_SEL 0x40
+#define AFU_SCRATCHPAD      0x48
+
+#define TG_NUM_PKT        0x3c00
+#define TG_PKT_LEN_TYPE   0x3c01
+#define TG_DATA_PATTERN   0x3c02
+#define TG_START_XFR      0x3c03
+#define TG_STOP_XFR       0x3c04
+#define TG_SRC_MAC_L      0x3c05
+#define TG_SRC_MAC_H      0x3c06
+#define TG_DST_MAC_L      0x3c07
+#define TG_DST_MAC_H      0x3c08
+#define TG_PKT_XFRD       0x3c09
+#define TG_RANDOM_SEED(n) (0x3c0a + (n))
+#define TG_PKT_LEN        0x3c0d
+
+#define TM_NUM_PKT        0x3d00
+#define TM_PKT_GOOD       0x3d01
+#define TM_PKT_BAD        0x3d02
+#define TM_BYTE_CNT0      0x3d03
+#define TM_BYTE_CNT1      0x3d04
+#define TM_AVST_RX_ERR    0x3d07
+#define   OVERFLOW_ERR    (1 << 9)
+#define   LENGTH_ERR      (1 << 8)
+#define   OVERSIZE_ERR    (1 << 7)
+#define   UNDERSIZE_ERR   (1 << 6)
+#define   MAC_CRC_ERR     (1 << 5)
+#define   PHY_ERR         (1 << 4)
+#define   ERR_VALID       (1 << 3)
+
+#define LOOPBACK_EN          0x3e00
+#define LOOPBACK_FIFO_STATUS 0x3e01
+#define   ALMOST_EMPTY    (1 << 1)
+#define   ALMOST_FULL     (1 << 0)
+
+#define MAILBOX_TIMEOUT_MS       100
+#define MAILBOX_POLL_INTERVAL_MS 10
+
+struct traffic_ctrl_cmd {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_cmd:1;
+			uint32_t write_cmd:1;
+			uint32_t ack_trans:1;
+			uint32_t rsvd1:29;
+			uint32_t afu_cmd_addr:16;
+			uint32_t rsvd2:16;
+		};
+	};
+};
+
+struct traffic_ctrl_data {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_data;
+			uint32_t write_data;
+		};
+	};
+};
+
+struct traffic_ctrl_ch_sel {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t channel_sel:3;
+			uint32_t rsvd1:29;
+			uint32_t rsvd2;
+		};
+	};
+};
+
+struct he_hssi_ctx {
+	uint8_t *addr;
+};
+
+struct he_hssi_priv {
+	struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
+	struct he_hssi_ctx he_hssi_ctx;
+};
+
+#endif /* _HE_HSSI_H_ */
diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c
new file mode 100644
index 0000000..d47ddde
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.c
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_lbk.h"
+
+static int he_lbk_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	v.mode = cfg->mode;
+	v.trput_interleave = cfg->trput_interleave;
+	if (cfg->multi_cl == 4)
+		v.multicl_len = 2;
+	else
+		v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_dsm_status *stat = NULL;
+	struct he_lbk_status0 stat0;
+	struct he_lbk_status1 stat1;
+	uint64_t swtest_msg = 0;
+	uint64_t ticks = 0;
+	uint64_t info = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	stat = ctx->status_ptr;
+
+	swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
+	stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
+	stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0) {
+		info = rte_read64(ctx->addr + CSR_HE_INFO0);
+		AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
+		cfg->freq_mhz = info & 0xffff;
+		if (cfg->freq_mhz == 0) {
+			AFU_MF_PMD_INFO("Frequency of AFU clock is unknown."
+				" Assuming 350 MHz.");
+			cfg->freq_mhz = 350;
+		}
+	}
+
+	num = (double)stat0.num_reads;
+	rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat0.num_writes;
+	wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Pend_Read Pend_Write "
+		"Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
+		cfg->freq_mhz);
+	printf("%10u  %10u %10u %10u %10u  %12lu   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat0.num_reads, stat0.num_writes,
+		stat1.num_pend_reads, stat1.num_pend_writes,
+		ticks, rd_bw / 1e9, wr_bw / 1e9);
+	printf("Test Message: 0x%"PRIx64"\n", swtest_msg);
+}
+
+static int he_lbk_test(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	rte_delay_us(1000);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	/* initialize DMA addresses */
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova),
+		ctx->addr + CSR_SRC_ADDR);
+
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova),
+		ctx->addr + CSR_DST_ADDR);
+
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova),
+		ctx->addr + CSR_AFU_DSM_BASEL);
+	rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova) >> 32,
+		ctx->addr + CSR_AFU_DSM_BASEH);
+
+	ret = he_lbk_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CACHE_LINE_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		rte_delay_us(1000);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		he_lbk_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		if (cfg->mode == NLB_MODE_LPBK) {
+			ptr = (uint32_t *)ctx->dest_ptr;
+			j = CACHE_LINE_SIZE(cl) >> 2;
+			for (i = 0; i < j; i++) {
+				if (*ptr++ != i) {
+					AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+					break;
+				}
+			}
+		}
+	}
+
+end:
+	return 0;
+}
+
+static int he_lbk_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int he_lbk_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
+	return 0;
+
+release:
+	he_lbk_ctx_release(dev);
+	return ret;
+}
+
+static int he_lbk_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return he_lbk_ctx_init(dev);
+}
+
+static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
+	if (cfg->mode > NLB_MODE_TRPUT)
+		return -EINVAL;
+	if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
+		(cfg->multi_cl != 4))
+		return -EINVAL;
+	if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin > MAX_CACHE_LINES))
+		return -EINVAL;
+	if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
+
+	return 0;
+}
+
+static int he_lbk_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	he_lbk_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_lbk_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+	fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+	fprintf(f, "dsm_iova:\t%p\n", (void *)ctx->dsm_iova);
+	fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+	fprintf(f, "src_iova:\t%p\n", (void *)ctx->src_iova);
+	fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+	fprintf(f, "dest_iova:\t%p\n", (void *)ctx->dest_iova);
+	fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_lbk_ops = {
+	.init = he_lbk_init,
+	.config = he_lbk_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_lbk_test,
+	.close = he_lbk_close,
+	.dump = he_lbk_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_lbk_drv = {
+	.uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
+
+struct afu_mf_drv he_mem_lbk_drv = {
+	.uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h
new file mode 100644
index 0000000..c2e8a29
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_LBK_H_
+#define _HE_LBK_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_LBK_UUID_L      0xb94b12284c31e02b
+#define HE_LBK_UUID_H      0x56e203e9864f49a7
+#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb
+#define HE_MEM_LBK_UUID_H  0x8568ab4e6ba54616
+
+extern struct afu_mf_drv he_lbk_drv;
+extern struct afu_mf_drv he_mem_lbk_drv;
+
+/* HE-LBK & HE-MEM-LBK registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct he_lbk_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct he_lbk_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t rsvd1:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd2:13;
+			uint32_t trput_interleave:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t rsvd3:2;
+		};
+	};
+};
+
+struct he_lbk_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct he_lbk_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct he_lbk_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+struct he_lbk_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct he_lbk_dsm_status *status_ptr;
+};
+
+struct he_lbk_priv {
+	struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
+	struct he_lbk_ctx he_lbk_ctx;
+};
+
+#endif /* _HE_LBK_H_ */
diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
new file mode 100644
index 0000000..ccbb3a8
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_mem.h"
+
+static int he_mem_tg_test(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+	uint64_t value = 0x12345678;
+	uint64_t cap = 0;
+	uint64_t channel_mask = 0;
+	int i, t = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_mem_tg_cfg;
+	ctx = &priv->he_mem_tg_ctx;
+
+	AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
+
+	rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
+	cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
+	AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
+	if (cap != value) {
+		AFU_MF_PMD_ERR("Test scratchpad register failed");
+		return -EIO;
+	}
+
+	cap = rte_read64(ctx->addr + MEM_TG_CTRL);
+	AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
+
+	channel_mask = cfg->channel_mask & cap;
+	/* start traffic generators */
+	rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
+
+	/* check test status */
+	while (t < MEM_TG_TIMEOUT_MS) {
+		value = rte_read64(ctx->addr + MEM_TG_STAT);
+		for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
+			if (channel_mask & (1 << i)) {
+				if (TGACTIVE(value, i))
+					continue;
+				printf("TG channel %d test %s\n", i,
+					TGPASS(value, i) ? "pass" :
+					TGTIMEOUT(value, i) ? "timeout" :
+					TGFAIL(value, i) ? "fail" : "error");
+				channel_mask &= ~(1 << i);
+			}
+		}
+		if (!channel_mask)
+			break;
+		rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
+		t += MEM_TG_POLL_INTERVAL_MS;
+	}
+
+	if (channel_mask) {
+		AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value);
+		return channel_mask;
+	}
+
+	return 0;
+}
+
+static int he_mem_tg_init(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_mem_tg_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_mem_tg_priv *priv = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv->he_mem_tg_cfg));
+
+	return 0;
+}
+
+static int he_mem_tg_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_mem_tg_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_mem_tg_ops = {
+	.init = he_mem_tg_init,
+	.config = he_mem_tg_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_mem_tg_test,
+	.close = he_mem_tg_close,
+	.dump = he_mem_tg_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_mem_tg_drv = {
+	.uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	.ops = &he_mem_tg_ops
+};
diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h
new file mode 100644
index 0000000..82404b6
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_MEM_H_
+#define _HE_MEM_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
+#define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
+
+#define NUM_MEM_TG_CHANNELS      4
+#define MEM_TG_TIMEOUT_MS     5000
+#define MEM_TG_POLL_INTERVAL_MS 10
+
+extern struct afu_mf_drv he_mem_tg_drv;
+
+/* MEM-TG registers definition */
+#define MEM_TG_SCRATCHPAD   0x28
+#define MEM_TG_CTRL         0x30
+#define   TGCONTROL(n)      (1 << (n))
+#define MEM_TG_STAT         0x38
+#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
+#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
+#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
+#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
+#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
+
+struct he_mem_tg_ctx {
+	uint8_t *addr;
+};
+
+struct he_mem_tg_priv {
+	struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
+	struct he_mem_tg_ctx he_mem_tg_ctx;
+};
+
+#endif /* _HE_MEM_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
new file mode 100644
index 0000000..f304bc8
--- /dev/null
+++ b/drivers/raw/afu_mf/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022 Intel Corporation
+
+deps += ['rawdev', 'bus_pci', 'bus_ifpga']
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
+	'he_hssi.c')
+
+headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c
new file mode 100644
index 0000000..420e84a
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.c
@@ -0,0 +1,1997 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+
+static int nlb_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->nlb_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	if (cfg->cache_policy == NLB_WRPUSH_I)
+		v.wrpush_i = 1;
+	else
+		v.wrthru_en = cfg->cache_policy;
+
+	if (cfg->cache_hint == NLB_RDLINE_MIXED)
+		v.rdsel = 3;
+	else
+		v.rdsel = cfg->cache_hint;
+
+	v.mode = cfg->mode;
+	v.chsel = cfg->read_vc;
+	v.wr_chsel = cfg->write_vc;
+	v.wrfence_chsel = cfg->wrfence_vc;
+	v.wrthru_en = cfg->cache_policy;
+	v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_dsm_status *stat = NULL;
+	uint64_t ticks = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	cfg = &priv->nlb_cfg;
+	stat = priv->nlb_ctx.status_ptr;
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0)
+		cfg->freq_mhz = 200;
+
+	num = (double)stat->num_reads;
+	rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat->num_writes;
+	wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
+		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
+	printf("%10u  %10u %11u  %12lu   %7.3f GB/s   %7.3f GB/s\n", cl,
+		stat->num_reads, stat->num_writes, ticks,
+		rd_bw / 1e9, wr_bw / 1e9);
+}
+
+static int nlb_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	ctx = &priv->nlb_ctx;
+	cfg = &priv->nlb_cfg;
+
+	/* initialize registers */
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova),
+		ctx->addr + CSR_SRC_ADDR);
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova),
+		ctx->addr + CSR_DST_ADDR);
+
+	ret = nlb_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CACHE_LINE_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
+
+		rte_delay_us(10);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		nlb_afu_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		ptr = (uint32_t *)ctx->dest_ptr;
+		j = CACHE_LINE_SIZE(cl) >> 2;
+		for (i = 0; i < j; i++) {
+			if (*ptr++ != i) {
+				AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+				break;
+			}
+		}
+	}
+
+end:
+	return ret;
+}
+
+static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
+{
+	int i = 0;
+
+	if (!ctx)
+		return;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		rte_free(ctx->dma_buf[i]);
+		ctx->dma_buf[i] = NULL;
+	}
+
+	rte_free(ctx->data_buf);
+	ctx->data_buf = NULL;
+
+	rte_free(ctx->ref_buf);
+	ctx->ref_buf = NULL;
+}
+
+static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
+	struct rte_pmd_afu_dma_cfg *cfg)
+{
+	size_t page_sz = sysconf(_SC_PAGE_SIZE);
+	int i, ret = 0;
+
+	if (!ctx || !cfg)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
+			TEST_MEM_ALIGN);
+		if (!ctx->dma_buf[i]) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
+		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
+	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->data_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->ref_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
+{
+	int *ptr = NULL;
+	size_t i = 0;
+	size_t dword_size = 0;
+
+	if (!ctx || !size)
+		return;
+
+	ptr = (int *)ctx->ref_buf;
+
+	if (ctx->pattern) {
+		memset(ptr, ctx->pattern, size);
+	} else {
+		srand(99);
+		dword_size = size >> 2;
+		for (i = 0; i < dword_size; i++)
+			*ptr++ = rand();
+	}
+	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
+}
+
+static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
+{
+	uint8_t *src = NULL;
+	uint8_t *dst = NULL;
+	size_t i = 0;
+	int n = 0;
+
+	if (!ctx || !size)
+		return -EINVAL;
+
+	src = (uint8_t *)ctx->ref_buf;
+	dst = (uint8_t *)ctx->data_buf;
+
+	if (memcmp(src, dst, size)) {
+		printf("Transfer is corrupted\n");
+		if (ctx->verbose) {
+			for (i = 0; i < size; i++) {
+				if (*src != *dst) {
+					if (++n >= ERR_CHECK_LIMIT)
+						break;
+					printf("Mismatch at 0x%zx, "
+						"Expected %02x  Actual %02x\n",
+						i, *src, *dst);
+				}
+				src++;
+				dst++;
+			}
+			if (n < ERR_CHECK_LIMIT) {
+				printf("Found %d error bytes\n", n);
+			} else {
+				printf("......\n");
+				printf("Found more than %d error bytes\n", n);
+			}
+		}
+		return -1;
+	}
+
+	printf("Transfer is verified\n");
+	return 0;
+}
+
+static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		rte_write64(*host_addr, dev_addr);
+}
+
+static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		*host_addr = rte_read64(dev_addr);
+}
+
+static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
+{
+	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
+
+	if (!ctx)
+		return;
+
+	if (requested_page != ctx->cur_ase_page) {
+		rte_write64(requested_page, ctx->ase_ctrl_addr);
+		ctx->cur_ase_page = requested_page;
+	}
+}
+
+static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy(((char *)(&val)) + shift, (void *)host_addr, count);
+
+	/* write back to device */
+	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
+
+	return 0;
+}
+
+static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(dst))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(dst)) {
+		/* Write out a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+
+		rte_write32(*(uint32_t *)src, ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Write out blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)src, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Write out remaining DWORD */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		rte_write32(*(uint32_t *)src, ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t count)
+{
+	uint64_t dst = *dst_ptr;
+	uint64_t src = *src_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* aligns address to 8 byte using dst masking method */
+	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
+		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		src += unaligned_size;
+		dst += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_write(ctx, &dst, &src, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using dst masking method */
+	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy((void *)host_addr, ((char *)(&val)) + shift, count);
+
+	return 0;
+}
+
+static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(src))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(src)) {
+		/* Read a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)dst = rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Read blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= src & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)dst, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Read remaining DWORD */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)dst = rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* Aligns address to 8 byte using src masking method */
+	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
+		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		dst += unaligned_size;
+		src += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_read(ctx, &src, &dst, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using src masking method */
+	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static void clear_interrupt(struct dma_afu_ctx *ctx)
+{
+	/* clear interrupt by writing 1 to IRQ bit in status register */
+	msgdma_status status;
+
+	if (!ctx)
+		return;
+
+	status.csr = 0;
+	status.irq = 1;
+	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
+}
+
+static int poll_interrupt(struct dma_afu_ctx *ctx)
+{
+	struct pollfd pfd = {0};
+	uint64_t count = 0;
+	ssize_t bytes_read = 0;
+	int poll_ret = 0;
+	int ret = 0;
+
+	if (!ctx || (ctx->event_fd < 0))
+		return -EINVAL;
+
+	pfd.fd = ctx->event_fd;
+	pfd.events = POLLIN;
+	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
+	if (poll_ret < 0) {
+		AFU_MF_PMD_ERR("Error %s", strerror(errno));
+		ret = -EFAULT;
+		goto out;
+	} else if (poll_ret == 0) {
+		AFU_MF_PMD_ERR("Timeout");
+		ret = -ETIME;
+	} else {
+		bytes_read = read(pfd.fd, &count, sizeof(count));
+		if (bytes_read > 0) {
+			if (ctx->verbose)
+				AFU_MF_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
+					poll_ret, count);
+			ret = 0;
+		} else {
+			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
+				strerror(errno) : "zero bytes read");
+			ret = -EIO;
+		}
+	}
+out:
+	clear_interrupt(ctx);
+	return ret;
+}
+
+static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
+{
+	msgdma_status status;
+	uint64_t fpga_queue_full = 0;
+
+	if (!ctx)
+		return;
+
+	if (ctx->verbose) {
+		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
+			desc->rd_address_ext, desc->rd_address);
+		AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
+			desc->wr_address_ext, desc->wr_address);
+		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
+		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
+			desc->wr_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
+			desc->rd_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
+		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
+	}
+
+	do {
+		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
+		if (fpga_queue_full++ > 100000000) {
+			AFU_MF_PMD_DEBUG("DMA queue full retry");
+			fpga_queue_full = 0;
+		}
+	} while (status.desc_buf_full);
+
+	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
+		sizeof(*desc));
+}
+
+static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	int count, int is_last_desc, fpga_dma_type type, int intr_en)
+{
+	msgdma_ext_desc *desc = NULL;
+	int alignment_offset = 0;
+	int segment_size = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	/* src, dst and count must be 64-byte aligned */
+	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
+		!IS_DMA_ALIGNED(count))
+		return -EINVAL;
+	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
+
+	/* these fields are fixed for all DMA transfers */
+	desc = ctx->desc_buf;
+	desc->seq_num = 0;
+	desc->wr_stride = 1;
+	desc->rd_stride = 1;
+	desc->control.go = 1;
+	if (intr_en)
+		desc->control.transfer_irq_en = 1;
+	else
+		desc->control.transfer_irq_en = 0;
+
+	if (!is_last_desc)
+		desc->control.early_done_en = 1;
+	else
+		desc->control.early_done_en = 0;
+
+	if (type == FPGA_TO_FPGA) {
+		desc->rd_address = src & DMA_MASK_32_BIT;
+		desc->wr_address = dst & DMA_MASK_32_BIT;
+		desc->len = count;
+		desc->wr_burst_count = 4;
+		desc->rd_burst_count = 4;
+		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+		send_descriptor(ctx, desc);
+	} else {
+		/* check CCIP (host) address is aligned to 4CL (256B) */
+		alignment_offset = (type == HOST_TO_FPGA)
+			? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
+		/* performing a short transfer to get aligned */
+		if (alignment_offset != 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* count isn't large enough to hit next 4CL boundary */
+			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = CCIP_ALIGN_BYTES
+					- alignment_offset;
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			/* post short transfer to align to a 4CL (256 byte) */
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* at this point we are 4CL (256 byte) aligned */
+		if (count >= CCIP_ALIGN_BYTES) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 4;
+			desc->rd_burst_count = 4;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* buffer ends on 4CL boundary */
+			if ((count % CCIP_ALIGN_BYTES) == 0) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = count
+					- (count % CCIP_ALIGN_BYTES);
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* post short transfer to handle the remainder */
+		if (count > 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->len = count;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			if (intr_en)
+				desc->control.transfer_irq_en = 1;
+			send_descriptor(ctx, desc);
+		}
+	}
+
+	return 0;
+}
+
+static int issue_magic(struct dma_afu_ctx *ctx)
+{
+	*(ctx->magic_buf) = 0ULL;
+	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
+		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
+}
+
+static void wait_magic(struct dma_afu_ctx *ctx)
+{
+	int magic_timeout = 0;
+
+	if (!ctx)
+		return;
+
+	poll_interrupt(ctx);
+	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
+		if (magic_timeout++ > 1000) {
+			AFU_MF_PMD_ERR("DMA magic operation timeout");
+			magic_timeout = 0;
+			break;
+		}
+	}
+	*(ctx->magic_buf) = 0ULL;
+}
+
+static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, int *intr_issued)
+{
+	int intr_en = 0;
+	int ret = 0;
+
+	if (!ctx || !intr_issued)
+		return -EINVAL;
+
+	src += chunk * ctx->dma_buf_size;
+	dst += chunk * ctx->dma_buf_size;
+
+	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
+		if (*intr_issued) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+		intr_en = 1;
+	}
+
+	chunk %= NUM_DMA_BUF;
+	rte_memcpy(ctx->dma_buf[chunk], (void *)src, ctx->dma_buf_size);
+	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
+			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
+	if (intr_en)
+		*intr_issued = 1;
+
+	return ret;
+}
+
+static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t dma_tx_bytes = 0;
+	uint64_t offset = 0;
+	int issued_intr = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(dst)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_host_to_fpga(ctx, &dst, &src, count_left);
+
+		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
+			* DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - dst;
+		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_tx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1), &issued_intr);
+			if (ret)
+				return ret;
+		}
+
+		if (issued_intr) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+
+		if (count_left) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_tx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_tx_bytes);
+				rte_memcpy(ctx->dma_buf[0],
+					(void *)(src + offset),
+					dma_tx_bytes);
+				ret = do_dma(ctx, dst + offset,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
+				if (ret)
+					return ret;
+				ret = poll_interrupt(ctx);
+				if (ret)
+					return ret;
+			}
+
+			count_left -= dma_tx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_tx_bytes;
+				src += offset + dma_tx_bytes;
+				ret = ase_host_to_fpga(ctx, &dst, &src,
+					count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
+{
+	uint64_t i = chunk % NUM_DMA_BUF;
+	uint64_t n = *rx_count;
+	uint64_t num_pending = 0;
+	int ret = 0;
+
+	if (!ctx || !wf_issued)
+		return -EINVAL;
+
+	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
+		src + chunk * ctx->dma_buf_size,
+		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
+	if (ret)
+		return ret;
+
+	num_pending = chunk - n + 1;
+	if (num_pending == HALF_DMA_BUF) {
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
+		if (*wf_issued) {
+			wait_magic(ctx);
+			for (i = 0; i < HALF_DMA_BUF; i++) {
+				rte_memcpy((void *)(dst +
+						n * ctx->dma_buf_size),
+					ctx->dma_buf[n % NUM_DMA_BUF],
+					ctx->dma_buf_size);
+				n++;
+			}
+			*wf_issued = 0;
+			*rx_count = n;
+		}
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t pending_buf = 0;
+	uint64_t dma_rx_bytes = 0;
+	uint64_t offset = 0;
+	int wf_issued = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(src)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_fpga_to_host(ctx, &src, &dst, count_left);
+
+		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
+			 * DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - src;
+		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_rx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1),
+				&pending_buf, &wf_issued);
+			if (ret)
+				return ret;
+		}
+
+		if (wf_issued)
+			wait_magic(ctx);
+
+		/* clear out final dma memcpy operations */
+		while (pending_buf < dma_chunks) {
+			/* constant size transfer; no length check required */
+			rte_memcpy((void *)(dst +
+					pending_buf * ctx->dma_buf_size),
+				ctx->dma_buf[pending_buf % NUM_DMA_BUF],
+				ctx->dma_buf_size);
+			pending_buf++;
+		}
+
+		if (count_left > 0) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_rx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_rx_bytes);
+				ret = do_dma(ctx,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					src + offset,
+					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
+				if (ret)
+					return ret;
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+				rte_memcpy((void *)(dst + offset),
+					ctx->dma_buf[0], dma_rx_bytes);
+			}
+
+			count_left -= dma_rx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_rx_bytes;
+				src += offset + dma_rx_bytes;
+				ret = ase_fpga_to_host(ctx, &src, &dst,
+							count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t dma_chunks = 0;
+	uint64_t offset = 0;
+	uint32_t tx_chunks = 0;
+	uint64_t *tmp_buf = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
+	    && IS_DMA_ALIGNED(count_left)) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
+				(i == (dma_chunks - 1))) {
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+			}
+		}
+
+		if (count_left > 0) {
+			AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
+			ret = do_dma(ctx, dst + offset, src + offset,
+				count_left, 1, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			ret = issue_magic(ctx);
+			if (ret)
+				return ret;
+			wait_magic(ctx);
+		}
+	} else {
+		if ((src < dst) && (src + count_left > dst)) {
+			AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
+				" -> 0x%"PRIx64" (0x%"PRIx64")",
+				src, dst, count_left);
+			return -EINVAL;
+		}
+		tx_chunks = count_left / ctx->dma_buf_size;
+		offset = tx_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
+			" (%u...0x%"PRIx64")",
+			src, dst, tx_chunks, count_left);
+		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
+			DMA_ALIGN_BYTES);
+		for (i = 0; i < tx_chunks; i++) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx,
+				dst + i * ctx->dma_buf_size,
+				(uint64_t)tmp_buf, ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+		}
+
+		if (count_left > 0) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + offset, count_left);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx, dst + offset,
+				(uint64_t)tmp_buf, count_left);
+			if (ret)
+				goto free_buf;
+		}
+free_buf:
+		rte_free(tmp_buf);
+	}
+
+	return ret;
+}
+
+static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
+	uint64_t src, size_t count, fpga_dma_type type)
+{
+	int ret = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (type == HOST_TO_FPGA)
+		ret = dma_host_to_fpga(ctx, dst, src, count);
+	else if (type == FPGA_TO_HOST)
+		ret = dma_fpga_to_host(ctx, dst, src, count);
+	else if (type == FPGA_TO_FPGA)
+		ret = dma_fpga_to_fpga(ctx, dst, src, count);
+	else
+		return -EINVAL;
+
+	return ret;
+}
+
+static double getTime(struct timespec start, struct timespec end)
+{
+	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
+		+ end.tv_nsec - start.tv_nsec;
+	return (double)diff / (double)1000000000L;
+}
+
+#define SWEEP_ITERS 1
+static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
+	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
+{
+	struct timespec start, end;
+	uint64_t test_size = 0;
+	uint64_t *dma_buf_ptr = NULL;
+	double throughput, total_time = 0.0;
+	int i = 0;
+	int ret = 0;
+
+	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
+		AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
+		return -EINVAL;
+	}
+
+	if (length < (buf_offset + size_decrement)) {
+		AFU_MF_PMD_ERR("Test length does not match unaligned parameter");
+		return -EINVAL;
+	}
+	test_size = length - (buf_offset + size_decrement);
+	if ((ddr_offset + test_size) > ctx->mem_size) {
+		AFU_MF_PMD_ERR("Test is out of DDR memory space");
+		return -EINVAL;
+	}
+
+	dma_buf_ptr = (uint64_t *)((uint64_t)ctx->data_buf + buf_offset);
+	printf("Sweep Host %p to FPGA 0x%"PRIx64
+		" with 0x%"PRIx64" bytes ...\n",
+		(void *)dma_buf_ptr, ddr_offset, test_size);
+
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
+			test_size, HOST_TO_FPGA);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
+		ddr_offset, (void *)dma_buf_ptr, test_size);
+
+	total_time = 0.0;
+	memset((char *)dma_buf_ptr, 0, test_size);
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
+			test_size, FPGA_TO_HOST);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Verifying buffer ...\n");
+	return dma_afu_buf_verify(ctx, test_size);
+}
+
+static int dma_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_dma_cfg *cfg = NULL;
+	msgdma_ctrl ctrl;
+	uint64_t offset = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->dma_cfg;
+	if (cfg->index >= NUM_N3000_DMA)
+		return -EINVAL;
+	ctx = &priv->dma_ctx[cfg->index];
+
+	ctx->pattern = (int)cfg->pattern;
+	ctx->verbose = (int)cfg->verbose;
+	ctx->dma_buf_size = cfg->size;
+
+	ret = dma_afu_buf_alloc(ctx, cfg);
+	if (ret)
+		goto free;
+
+	printf("Initialize test buffer\n");
+	dma_afu_buf_init(ctx, cfg->length);
+
+	/* enable interrupt */
+	ctrl.csr = 0;
+	ctrl.global_intr_en_mask = 1;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
+		cfg->offset, cfg->length);
+	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
+		cfg->length, HOST_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from host to FPGA");
+		goto end;
+	}
+	memset(ctx->data_buf, 0, cfg->length);
+
+	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
+		offset = cfg->offset + cfg->length;
+	else if (cfg->offset > cfg->length)
+		offset = 0;
+	else
+		goto end;
+
+	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
+		cfg->offset, offset, cfg->length);
+	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
+		FPGA_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to FPGA");
+		goto end;
+	}
+
+	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	printf("Sweep with aligned address and size\n");
+	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
+	if (ret)
+		goto end;
+
+	if (cfg->unaligned) {
+		printf("Sweep with unaligned address and size\n");
+		struct unaligned_set {
+			uint64_t addr_offset;
+			uint64_t size_dec;
+		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
+		for (i = 0; i < ARRAY_SIZE(param); i++) {
+			ret = sweep_test(ctx, cfg->length, cfg->offset,
+				param[i].addr_offset, param[i].size_dec);
+			if (ret)
+				break;
+		}
+	}
+
+end:
+	/* disable interrupt */
+	ctrl.global_intr_en_mask = 0;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev *dev)
+{
+	struct rte_afu_device *afudev = NULL;
+
+	if (!dev || !dev->rawdev || !dev->rawdev->device)
+		return NULL;
+
+	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
+	if (!afudev->rawdev || !afudev->rawdev->device)
+		return NULL;
+
+	return RTE_DEV_TO_PCI(afudev->rawdev->device);
+}
+
+static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
+	uint32_t count, int *efds)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	struct vfio_irq_set *irq_set = NULL;
+	int vfio_dev_fd = 0;
+	size_t sz = 0;
+	int ret = 0;
+
+	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
+		return -EINVAL;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return -ENODEV;
+	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
+
+	sz = sizeof(*irq_set) + sizeof(*efds) * count;
+	irq_set = rte_zmalloc(NULL, sz, 0);
+	if (!irq_set)
+		return -ENOMEM;
+
+	irq_set->argsz = (uint32_t)sz;
+	irq_set->count = count;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+		VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = vec_start;
+
+	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
+	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret)
+		AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
+
+	rte_free(irq_set);
+	return ret;
+}
+
+static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint32_t bar = 0;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return NULL;
+
+	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
+	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
+	if (!PORT_IMPLEMENTED(val)) {
+		AFU_MF_PMD_INFO("FIU port %d is not implemented", dev->port);
+		return NULL;
+	}
+
+	bar = PORT_BAR(val);
+	if (bar >= PCI_MAX_RESOURCE) {
+		AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
+		return NULL;
+	}
+
+	addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
+	return addr;
+}
+
+static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
+	uint32_t *vec_start, uint32_t *vec_count)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint64_t header = 0;
+	uint64_t next_offset = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
+			(DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
+			val = rte_read64(addr + PORT_UINT_CAP_REG);
+			if (vec_start)
+				*vec_start = PORT_VEC_START(val);
+			if (vec_count)
+				*vec_count = PORT_VEC_COUNT(val);
+			return 0;
+		}
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return -ENOENT;
+}
+
+static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+	ctx->addr = addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
+	return 0;
+
+release:
+	nlb_afu_ctx_release(dev);
+	return ret;
+}
+
+static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[0];
+
+	rte_free(ctx->desc_buf);
+	ctx->desc_buf = NULL;
+
+	rte_free(ctx->magic_buf);
+	ctx->magic_buf = NULL;
+
+	close(ctx->event_fd);
+	return 0;
+}
+
+static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
+	static int efds[1] = {0};
+	uint32_t vec_start = 0;
+	int ret = 0;
+
+	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[index];
+	ctx->index = index;
+	ctx->addr = addr;
+	ctx->csr_addr = addr + DMA_CSR;
+	ctx->desc_addr = addr + DMA_DESC;
+	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
+	ctx->ase_data_addr = addr + DMA_ASE_DATA;
+	ctx->mem_size = mem_sz[ctx->index];
+	ctx->cur_ase_page = INVALID_ASE_PAGE;
+	if (ctx->index == 0) {
+		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
+		if (ret)
+			return ret;
+
+		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+		if (efds[0] < 0) {
+			AFU_MF_PMD_ERR("eventfd create failed");
+			return -EBADF;
+		}
+
+		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
+			AFU_MF_PMD_ERR("DMA interrupt setup failed");
+	}
+	ctx->event_fd = efds[0];
+
+	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
+		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
+	if (!ctx->desc_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->magic_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
+	if (ctx->magic_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	return 0;
+
+release:
+	dma_afu_ctx_release(dev);
+	return ret;
+}
+
+static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	uint8_t *addr = NULL;
+	uint64_t header = 0;
+	uint64_t uuid_hi = 0;
+	uint64_t uuid_lo = 0;
+	uint64_t next_offset = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	addr = (uint8_t *)dev->addr;
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
+		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
+
+		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
+			(uuid_lo == N3000_NLB0_UUID_L) &&
+			(uuid_hi == N3000_NLB0_UUID_H)) {
+			AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
+			ret = nlb_afu_ctx_init(dev, addr);
+			if (ret)
+				return ret;
+		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
+			(uuid_lo == N3000_DMA_UUID_L) &&
+			(uuid_hi == N3000_DMA_UUID_H) &&
+			(priv->num_dma < NUM_N3000_DMA)) {
+			AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
+				priv->num_dma, (void *)addr);
+			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
+			if (ret)
+				return ret;
+			priv->num_dma++;
+		} else {
+			AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
+				", uuid %016"PRIx64"%016"PRIx64,
+				DFH_TYPE(header), uuid_hi, uuid_lo);
+		}
+
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return 0;
+}
+
+static int n3000_afu_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return n3000_afu_ctx_init(dev);
+}
+
+static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
+	int i = 0;
+	uint64_t top = 0;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
+	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
+		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
+			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
+			return -EINVAL;
+		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.multi_cl != 1) &&
+			(cfg->nlb_cfg.multi_cl != 2) &&
+			(cfg->nlb_cfg.multi_cl != 4))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
+			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
+			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
+			return -EINVAL;
+		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
+			sizeof(struct rte_pmd_afu_nlb_cfg));
+	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
+		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
+			return -EINVAL;
+		i = cfg->dma_cfg.index;
+		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
+		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
+			return -EINVAL;
+		if (i == 3) {  /* QDR connected to DMA3 */
+			if (cfg->dma_cfg.length & 0x3f) {
+				cfg->dma_cfg.length &= ~0x3f;
+				AFU_MF_PMD_INFO("Round size to %x for QDR",
+					cfg->dma_cfg.length);
+			}
+		}
+		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
+			sizeof(struct rte_pmd_afu_dma_cfg));
+	} else {
+		AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
+		return -EINVAL;
+	}
+
+	priv->cfg_type = cfg->type;
+	return 0;
+}
+
+static int n3000_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		AFU_MF_PMD_INFO("Test NLB");
+		ret = nlb_afu_test(dev);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
+		ret = dma_afu_test(dev);
+	} else {
+		AFU_MF_PMD_ERR("Please configure AFU before test");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int n3000_afu_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	nlb_afu_ctx_release(dev);
+	dma_afu_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct n3000_afu_priv *priv = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+		fprintf(f, "dsm_iova:\t%p\n", (void *)ctx->dsm_iova);
+		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+		fprintf(f, "src_iova:\t%p\n", (void *)ctx->src_iova);
+		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+		fprintf(f, "dest_iova:\t%p\n", (void *)ctx->dest_iova);
+		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
+		fprintf(f, "index:\t\t%d\n", ctx->index);
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
+		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
+		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
+		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
+		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
+		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
+		fprintf(f, "magic_iova:\t%p\n", (void *)ctx->magic_iova);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int n3000_afu_reset(struct afu_mf_rawdev *dev)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	val = rte_read64(addr + PORT_CTRL_REG);
+	val |= PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+	rte_delay_us(100);
+	val &= ~PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+
+	return 0;
+}
+
+static struct afu_mf_ops n3000_afu_ops = {
+	.init = n3000_afu_init,
+	.config = n3000_afu_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = n3000_afu_test,
+	.close = n3000_afu_close,
+	.dump = n3000_afu_dump,
+	.reset = n3000_afu_reset
+};
+
+struct afu_mf_drv n3000_afu_drv = {
+	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	.ops = &n3000_afu_ops
+};
diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h
new file mode 100644
index 0000000..4c740da
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _N3000_AFU_H_
+#define _N3000_AFU_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define N3000_AFU_UUID_L  0xc000c9660d824272
+#define N3000_AFU_UUID_H  0x9aeffe5f84570612
+#define N3000_NLB0_UUID_L 0xf89e433683f9040b
+#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
+#define N3000_DMA_UUID_L  0xa9149a35bace01ea
+#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
+
+extern struct afu_mf_drv n3000_afu_drv;
+
+#define NUM_N3000_DMA  4
+#define MAX_MSIX_VEC   7
+
+/* N3000 DFL definition */
+#define DFH_UUID_L_OFFSET  8
+#define DFH_UUID_H_OFFSET  16
+#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
+#define DFH_TYPE_AFU  1
+#define DFH_TYPE_BBB  2
+#define DFH_TYPE_PRIVATE  3
+#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
+#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
+#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
+#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
+#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
+#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
+#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
+#define PORT_FEATURE_UINT_ID  0x12
+#define PORT_UINT_CAP_REG  0x8
+#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
+#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
+#define PORT_CTRL_REG  0x38
+#define PORT_SOFT_RESET  (0x1 << 0)
+
+/* NLB registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_STATUS         0x40
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct nlb_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct nlb_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t wrthru_en:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd1:1;
+			uint32_t delay_en:1;
+			uint32_t rdsel:2;
+			uint32_t rsvd2:1;
+			uint32_t chsel:3;
+			uint32_t rsvd3:1;
+			uint32_t wrpush_i:1;
+			uint32_t wr_chsel:3;
+			uint32_t rsvd4:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t wrfence_chsel:2;
+		};
+	};
+};
+
+struct nlb_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct nlb_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct nlb_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+/* DMA registers definition */
+#define DMA_CSR       0x40
+#define DMA_DESC      0x60
+#define DMA_ASE_CTRL  0x200
+#define DMA_ASE_DATA  0x1000
+
+#define DMA_ASE_WINDOW       4096
+#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
+#define INVALID_ASE_PAGE     0xffffffffffffffffULL
+
+#define DMA_WF_MAGIC             0x5772745F53796E63ULL
+#define DMA_WF_MAGIC_ROM         0x1000000000000
+#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
+#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
+
+#define NUM_DMA_BUF   8
+#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
+
+#define DMA_MASK_32_BIT 0xFFFFFFFF
+
+#define DMA_CSR_BUSY           0x1
+#define DMA_DESC_BUFFER_EMPTY  0x2
+#define DMA_DESC_BUFFER_FULL   0x4
+
+#define DWORD_BYTES 4
+#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
+
+#define QWORD_BYTES 8
+#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
+
+#define DMA_ALIGN_BYTES 64
+#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
+
+#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
+
+#define DMA_TIMEOUT_MSEC  5000
+
+#define MAGIC_BUF_SIZE  64
+#define ERR_CHECK_LIMIT  64
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+typedef enum {
+	HOST_TO_FPGA = 0,
+	FPGA_TO_HOST,
+	FPGA_TO_FPGA,
+	FPGA_MAX_TRANSFER_TYPE,
+} fpga_dma_type;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t tx_channel:8;
+		uint32_t generate_sop:1;
+		uint32_t generate_eop:1;
+		uint32_t park_reads:1;
+		uint32_t park_writes:1;
+		uint32_t end_on_eop:1;
+		uint32_t reserved_1:1;
+		uint32_t transfer_irq_en:1;
+		uint32_t early_term_irq_en:1;
+		uint32_t trans_error_irq_en:8;
+		uint32_t early_done_en:1;
+		uint32_t reserved_2:6;
+		uint32_t go:1;
+	};
+} msgdma_desc_ctrl;
+
+typedef struct __rte_packed {
+	uint32_t rd_address;
+	uint32_t wr_address;
+	uint32_t len;
+	uint16_t seq_num;
+	uint8_t rd_burst_count;
+	uint8_t wr_burst_count;
+	uint16_t rd_stride;
+	uint16_t wr_stride;
+	uint32_t rd_address_ext;
+	uint32_t wr_address_ext;
+	msgdma_desc_ctrl control;
+} msgdma_ext_desc;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t busy:1;
+		uint32_t desc_buf_empty:1;
+		uint32_t desc_buf_full:1;
+		uint32_t rsp_buf_empty:1;
+		uint32_t rsp_buf_full:1;
+		uint32_t stopped:1;
+		uint32_t resetting:1;
+		uint32_t stopped_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t irq:1;
+		uint32_t reserved:22;
+	};
+} msgdma_status;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t stop_dispatcher:1;
+		uint32_t reset_dispatcher:1;
+		uint32_t stop_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t global_intr_en_mask:1;
+		uint32_t stop_descriptors:1;
+		uint32_t reserved:22;
+	};
+} msgdma_ctrl;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_fill_level:16;
+		uint32_t wr_fill_level:16;
+	};
+} msgdma_fill_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rsp_fill_level:16;
+		uint32_t reserved:16;
+	};
+} msgdma_rsp_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_seq_num:16;
+		uint32_t wr_seq_num:16;
+	};
+} msgdma_seq_num;
+
+typedef struct __rte_packed {
+	msgdma_status status;
+	msgdma_ctrl ctrl;
+	msgdma_fill_level fill_level;
+	msgdma_rsp_level rsp;
+	msgdma_seq_num seq_num;
+} msgdma_csr;
+
+#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
+#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
+
+struct nlb_afu_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct nlb_dsm_status *status_ptr;
+};
+
+struct dma_afu_ctx {
+	int index;
+	uint8_t *addr;
+	uint8_t *csr_addr;
+	uint8_t *desc_addr;
+	uint8_t *ase_ctrl_addr;
+	uint8_t *ase_data_addr;
+	uint64_t mem_size;
+	uint64_t cur_ase_page;
+	int event_fd;
+	int verbose;
+	int pattern;
+	void *data_buf;
+	void *ref_buf;
+	msgdma_ext_desc *desc_buf;
+	uint64_t *magic_buf;
+	uint64_t magic_iova;
+	uint32_t dma_buf_size;
+	uint64_t *dma_buf[NUM_DMA_BUF];
+	uint64_t dma_iova[NUM_DMA_BUF];
+};
+
+struct n3000_afu_priv {
+	struct rte_pmd_afu_nlb_cfg nlb_cfg;
+	struct rte_pmd_afu_dma_cfg dma_cfg;
+	struct nlb_afu_ctx nlb_ctx;
+	struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
+	int num_dma;
+	int cfg_type;
+};
+
+#endif /* _N3000_AFU_H_ */
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
new file mode 100644
index 0000000..89d866a
--- /dev/null
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __RTE_PMD_AFU_H__
+#define __RTE_PMD_AFU_H__
+
+/**
+ * @file rte_pmd_afu.h
+ *
+ * AFU PMD specific definitions.
+ *
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_PMD_AFU_N3000_NLB   1
+#define RTE_PMD_AFU_N3000_DMA   2
+
+#define NLB_MODE_LPBK      0
+#define NLB_MODE_READ      1
+#define NLB_MODE_WRITE     2
+#define NLB_MODE_TRPUT     3
+
+#define NLB_VC_AUTO        0
+#define NLB_VC_VL0         1
+#define NLB_VC_VH0         2
+#define NLB_VC_VH1         3
+#define NLB_VC_RANDOM      4
+
+#define NLB_WRLINE_M       0
+#define NLB_WRLINE_I       1
+#define NLB_WRPUSH_I       2
+
+#define NLB_RDLINE_S       0
+#define NLB_RDLINE_I       1
+#define NLB_RDLINE_MIXED   2
+
+#define MIN_CACHE_LINES   1
+#define MAX_CACHE_LINES   1024
+
+#define MIN_DMA_BUF_SIZE  64
+#define MAX_DMA_BUF_SIZE  (1023 * 1024)
+
+/**
+ * NLB AFU configuration data structure.
+ */
+struct rte_pmd_afu_nlb_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t cache_policy;
+	uint32_t cache_hint;
+	uint32_t read_vc;
+	uint32_t write_vc;
+	uint32_t wrfence_vc;
+	uint32_t freq_mhz;
+};
+
+/**
+ * DMA AFU configuration data structure.
+ */
+struct rte_pmd_afu_dma_cfg {
+	uint32_t index;     /* index of DMA controller */
+	uint32_t length;    /* total length of data to DMA */
+	uint32_t offset;    /* address offset of target memory */
+	uint32_t size;      /* size of transfer buffer */
+	uint32_t pattern;   /* data pattern to fill in test buffer */
+	uint32_t unaligned; /* use unaligned address or length in sweep test */
+	uint32_t verbose;   /* enable verbose error information in test */
+};
+
+/**
+ * N3000 AFU configuration data structure.
+ */
+struct rte_pmd_afu_n3000_cfg {
+	int type;   /* RTE_PMD_AFU_N3000_NLB or RTE_PMD_AFU_N3000_DMA */
+	union {
+		struct rte_pmd_afu_nlb_cfg nlb_cfg;
+		struct rte_pmd_afu_dma_cfg dma_cfg;
+	};
+};
+
+/**
+ * HE-LBK & HE-MEM-LBK AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_lbk_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t trput_interleave;
+	uint32_t freq_mhz;
+};
+
+/**
+ * HE-MEM-TG AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_mem_tg_cfg {
+	uint32_t channel_mask;   /* mask of traffic generator channel */
+};
+
+/**
+ * HE-HSSI AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_hssi_cfg {
+	uint32_t port;
+	uint32_t timeout;
+	uint32_t num_packets;
+	uint32_t random_length;
+	uint32_t packet_length;
+	uint32_t random_payload;
+	uint32_t rnd_seed[3];
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	int he_loopback;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PMD_AFU_H__ */
diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map
new file mode 100644
index 0000000..c2e0723
--- /dev/null
+++ b/drivers/raw/afu_mf/version.map
@@ -0,0 +1,3 @@
+DPDK_22 {
+	local: *;
+};
diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
index 05e7de1..c3627f7 100644
--- a/drivers/raw/meson.build
+++ b/drivers/raw/meson.build
@@ -6,6 +6,7 @@ if is_windows
 endif
 
 drivers = [
+        'afu_mf',
         'cnxk_bphy',
         'cnxk_gpio',
         'dpaa2_cmdif',
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3] raw/afu_mf: introduce AFU MF device driver
  2022-05-17  7:34 ` [PATCH v2] " Wei Huang
@ 2022-05-19  2:43   ` Wei Huang
  2022-05-19  5:52     ` [PATCH v4] " Wei Huang
  0 siblings, 1 reply; 57+ messages in thread
From: Wei Huang @ 2022-05-19  2:43 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

Add afu_mf driver to manage various AFU (Acceleration Function Unit)
in FPGA.

Signed-off-by: Wei Huang <wei.huang@intel.com>
Acked-by: Tianfei Zhang <tianfei.zhang@intel.com>
---
v2: fix typo
---
v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
 drivers/raw/afu_mf/afu_mf_rawdev.h |   91 ++
 drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
 drivers/raw/afu_mf/he_hssi.h       |  102 ++
 drivers/raw/afu_mf/he_lbk.c        |  430 ++++++++
 drivers/raw/afu_mf/he_lbk.h        |  121 +++
 drivers/raw/afu_mf/he_mem.c        |  181 ++++
 drivers/raw/afu_mf/he_mem.h        |   40 +
 drivers/raw/afu_mf/meson.build     |    8 +
 drivers/raw/afu_mf/n3000_afu.c     | 2007 ++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
 drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
 drivers/raw/afu_mf/version.map     |    3 +
 drivers/raw/meson.build            |    1 +
 14 files changed, 4260 insertions(+)
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
 create mode 100644 drivers/raw/afu_mf/he_hssi.c
 create mode 100644 drivers/raw/afu_mf/he_hssi.h
 create mode 100644 drivers/raw/afu_mf/he_lbk.c
 create mode 100644 drivers/raw/afu_mf/he_lbk.h
 create mode 100644 drivers/raw/afu_mf/he_mem.c
 create mode 100644 drivers/raw/afu_mf/he_mem.h
 create mode 100644 drivers/raw/afu_mf/meson.build
 create mode 100644 drivers/raw/afu_mf/n3000_afu.c
 create mode 100644 drivers/raw/afu_mf/n3000_afu.h
 create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
 create mode 100644 drivers/raw/afu_mf/version.map

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
new file mode 100644
index 0000000..f24c748
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -0,0 +1,440 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_rawdev_pmd.h>
+
+#include "rte_pmd_afu.h"
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+#include "he_lbk.h"
+#include "he_mem.h"
+#include "he_hssi.h"
+
+#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
+
+static const struct rte_afu_uuid afu_uuid_map[] = {
+	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
+	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	{ HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	{ 0, 0 /* sentinel */ }
+};
+
+static struct afu_mf_drv *afu_table[] = {
+	&n3000_afu_drv,
+	&he_lbk_drv,
+	&he_mem_lbk_drv,
+	&he_mem_tg_drv,
+	&he_hssi_drv,
+	NULL
+};
+
+static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
+{
+	int32_t x = 0;
+
+	if (!dev || !dev->shared)
+		return -ENODEV;
+
+	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
+
+	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1,
+				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
+{
+	if (!dev || !dev->shared)
+		return;
+
+	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
+}
+
+static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
+	rte_rawdev_obj_t config, size_t config_size)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->config)
+		ret = (*dev->ops->config)(dev, config, config_size);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_start(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please start it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->start)
+		ret = (*dev->ops->start)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
+		return;
+	}
+
+	if (dev->ops && dev->ops->stop)
+		ret = (*dev->ops->stop)(dev);
+
+	afu_mf_unlock(dev);
+}
+
+static int afu_mf_rawdev_close(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->close)
+		ret = (*dev->ops->close)(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->reset)
+		ret = (*dev->ops->reset)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_selftest(uint16_t dev_id)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
+		return -ENODEV;
+
+	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
+	if (!dev)
+		return -ENOENT;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please test it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->test)
+		ret = (*dev->ops->test)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->dump)
+		ret = (*dev->ops->dump)(dev, f);
+
+	return ret;
+}
+
+static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
+	.dev_info_get = NULL,
+	.dev_configure = afu_mf_rawdev_configure,
+	.dev_start = afu_mf_rawdev_start,
+	.dev_stop = afu_mf_rawdev_stop,
+	.dev_close = afu_mf_rawdev_close,
+	.dev_reset = afu_mf_rawdev_reset,
+
+	.queue_def_conf = NULL,
+	.queue_setup = NULL,
+	.queue_release = NULL,
+	.queue_count = NULL,
+
+	.attr_get = NULL,
+	.attr_set = NULL,
+
+	.enqueue_bufs = NULL,
+	.dequeue_bufs = NULL,
+
+	.dump = afu_mf_rawdev_dump,
+
+	.xstats_get = NULL,
+	.xstats_get_names = NULL,
+	.xstats_get_by_name = NULL,
+	.xstats_reset = NULL,
+
+	.firmware_status_get = NULL,
+	.firmware_version_get = NULL,
+	.firmware_load = NULL,
+	.firmware_unload = NULL,
+
+	.dev_selftest = afu_mf_rawdev_selftest,
+};
+
+static int
+afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
+	int socket_id)
+{
+	const struct rte_memzone *mz;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	struct afu_mf_shared *ptr = NULL;
+	int init_mz = 0;
+
+	if (!name || !data)
+		return -EINVAL;
+
+	/* name format is afu_?|??:??.? which is unique */
+	snprintf(mz_name, sizeof(mz_name), "%s", name);
+
+	mz = rte_memzone_lookup(mz_name);
+	if (!mz) {
+		mz = rte_memzone_reserve(mz_name,
+				sizeof(struct afu_mf_shared),
+				socket_id, 0);
+		init_mz = 1;
+	}
+
+	if (!mz) {
+		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
+			mz_name);
+		return -ENOMEM;
+	}
+
+	ptr = (struct afu_mf_shared *)mz->addr;
+
+	if (init_mz)  /* initialize memory zone on the first time */
+		ptr->lock = 0;
+
+	*data = ptr;
+
+	return 0;
+}
+
+static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char *name,
+	size_t size)
+{
+	int n = 0;
+
+	if (!afu_dev || !name || !size)
+		return -EINVAL;
+
+	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
+	if (n >= (int)size) {
+		AFU_MF_PMD_ERR("Name of AFU device is too long!");
+		return -ENAMETOOLONG;
+	}
+
+	return 0;
+}
+
+static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
+{
+	struct afu_mf_drv *entry = NULL;
+	int i = 0;
+
+	if (!afu_id)
+		return NULL;
+
+	while ((entry = afu_table[i++])) {
+		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
+			(entry->uuid.uuid_high == afu_id->uuid_high))
+			break;
+	}
+
+	return entry ? entry->ops : NULL;
+}
+
+static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int socket_id)
+{
+	struct rte_rawdev *rawdev = NULL;
+	struct afu_mf_rawdev *dev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
+		name, socket_id);
+
+	/* Allocate device structure */
+	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct afu_mf_rawdev),
+				socket_id);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Unable to allocate raw device");
+		return -ENOMEM;
+	}
+
+	rawdev->dev_ops = &afu_mf_rawdev_ops;
+	rawdev->device = &afu_dev->device;
+	rawdev->driver_name = afu_dev->driver->driver.name;
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		goto cleanup;
+
+	dev->rawdev = rawdev;
+	dev->port = afu_dev->id.port;
+	dev->addr = afu_dev->mem_resource[0].addr;
+	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
+	if (dev->ops == NULL) {
+		AFU_MF_PMD_ERR("Unsupported AFU device");
+		goto cleanup;
+	}
+
+	if (dev->ops->init) {
+		ret = (*dev->ops->init)(dev);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed to init %s", name);
+			goto cleanup;
+		}
+	}
+
+	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
+	if (ret)
+		goto cleanup;
+
+	return ret;
+
+cleanup:
+	rte_rawdev_pmd_release(rawdev);
+	return ret;
+}
+
+static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev)
+{
+	struct rte_rawdev *rawdev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Destroy raw device %s", name);
+
+	rawdev = rte_rawdev_pmd_get_named_dev(name);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Raw device %s not found", name);
+		return -EINVAL;
+	}
+
+	/* rte_rawdev_close is called by pmd_release */
+	ret = rte_rawdev_pmd_release(rawdev);
+	if (ret)
+		AFU_MF_PMD_DEBUG("Device cleanup failed");
+
+	return 0;
+}
+
+static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_create(afu_dev, rte_socket_id());
+}
+
+static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_destroy(afu_dev);
+}
+
+static struct rte_afu_driver afu_mf_pmd_drv = {
+	.id_table = afu_uuid_map,
+	.probe = afu_mf_rawdev_probe,
+	.remove = afu_mf_rawdev_remove
+};
+
+RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv);
+RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h b/drivers/raw/afu_mf/afu_mf_rawdev.h
new file mode 100644
index 0000000..610a103
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __AFU_MF_RAWDEV_H__
+#define __AFU_MF_RAWDEV_H__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+extern int afu_mf_pmd_logtype;
+
+#define AFU_MF_PMD_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
+		__func__, ##args)
+
+#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
+
+#define AFU_MF_PMD_DEBUG(fmt, args...) \
+	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
+#define AFU_MF_PMD_INFO(fmt, args...) \
+	AFU_MF_PMD_LOG(INFO, fmt, ## args)
+#define AFU_MF_PMD_ERR(fmt, args...) \
+	AFU_MF_PMD_LOG(ERR, fmt, ## args)
+#define AFU_MF_PMD_WARN(fmt, args...) \
+	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
+
+#ifndef CACHE_LINE_SIZE
+#define CACHE_LINE_SIZE(n)  ((n) << 6)
+#endif
+#define CACHE_LINE_ALIGNED(n)  ((n) >> 6)
+#define MHZ(f)  ((f) * 1000000)
+
+#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
+({                                                       \
+	uint64_t __wait = 0;                                 \
+	uint64_t __invl = (invl);                            \
+	uint64_t __timeout = (timeout);                      \
+	for (; __wait <= __timeout; __wait += __invl) {      \
+		(val) = *(addr);                                 \
+		if (cond)                                        \
+			break;                                       \
+		rte_delay_ms(__invl);                            \
+	}                                                    \
+	(cond) ? 0 : 1;                                      \
+})
+
+struct afu_mf_rawdev;
+
+struct afu_mf_ops {
+	int (*init)(struct afu_mf_rawdev *dev);
+	int (*config)(struct afu_mf_rawdev *dev, void *config,
+		size_t config_size);
+	int (*start)(struct afu_mf_rawdev *dev);
+	int (*stop)(struct afu_mf_rawdev *dev);
+	int (*test)(struct afu_mf_rawdev *dev);
+	int (*close)(struct afu_mf_rawdev *dev);
+	int (*reset)(struct afu_mf_rawdev *dev);
+	int (*dump)(struct afu_mf_rawdev *dev, FILE *f);
+};
+
+struct afu_mf_drv {
+	struct rte_afu_uuid uuid;
+	struct afu_mf_ops *ops;
+};
+
+struct afu_mf_shared {
+	int32_t lock;
+};
+
+struct afu_mf_rawdev {
+	struct rte_rawdev *rawdev;  /* point to parent raw device */
+	struct afu_mf_shared *shared;  /* shared data for multi-process */
+	struct afu_mf_ops *ops;  /* device operation functions */
+	int port;  /* index of port the AFU attached */
+	void *addr;  /* base address of AFU registers */
+	void *priv;  /* private driver data */
+};
+
+static inline struct afu_mf_rawdev *
+afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev)
+{
+	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL;
+}
+
+#endif /* __AFU_MF_RAWDEV_H__ */
diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c
new file mode 100644
index 0000000..bedafbd
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_hssi.h"
+
+static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr, value);
+
+	if (!ctx)
+		return -EINVAL;
+
+	data.write_data = value;
+	rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
+
+	cmd.csr = 0;
+	cmd.write_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t *value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	cmd.csr = 0;
+	cmd.read_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans) {
+			data.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_DATA);
+			*value = data.read_data;
+			break;
+		}
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr, *value);
+	return 0;
+}
+
+static void he_hssi_report(struct he_hssi_ctx *ctx)
+{
+	uint32_t val = 0;
+	uint64_t v64 = 0;
+	int ret = 0;
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
+	if (ret)
+		return;
+	printf("Number of good packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
+	if (ret)
+		return;
+	printf("Number of bad packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
+	if (ret)
+		return;
+	v64 = val;
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
+	if (ret)
+		return;
+	v64 = (v64 << 32) | val;
+	printf("Number of bytes received: %"PRIu64"\n", v64);
+
+	ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
+	if (ret)
+		return;
+	if (val & ERR_VALID) {
+		printf("AVST rx error:");
+		if (val & OVERFLOW_ERR)
+			printf(" overflow");
+		if (val & LENGTH_ERR)
+			printf(" length");
+		if (val & OVERSIZE_ERR)
+			printf(" oversize");
+		if (val & UNDERSIZE_ERR)
+			printf(" undersize");
+		if (val & MAC_CRC_ERR)
+			printf(" crc");
+		if (val & PHY_ERR)
+			printf(" phy");
+		printf("\n");
+	}
+
+	ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
+	if (ret)
+		return;
+	if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
+		printf("FIFO status:");
+		if (val & ALMOST_EMPTY)
+			printf(" almost empty");
+		if (val & ALMOST_FULL)
+			printf(" almost full");
+		printf("\n");
+	}
+}
+
+static int he_hssi_test(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+	struct traffic_ctrl_ch_sel sel;
+	uint32_t val = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_hssi_cfg;
+	ctx = &priv->he_hssi_ctx;
+
+	ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
+	if (ret)
+		return ret;
+
+	sel.channel_sel = cfg->port;
+	rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
+
+	if (cfg->he_loopback >= 0) {
+		val = cfg->he_loopback ? 1 : 0;
+		AFU_MF_PMD_INFO("%s HE loopback on port %u",
+			val ? "Enable" : "Disable", cfg->port);
+		return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
+	if (ret)
+		return ret;
+
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
+	if (ret)
+		return ret;
+
+	val = cfg->src_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->src_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->dest_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->dest_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_length ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_payload ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 3; i++) {
+		ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
+			cfg->rnd_seed[i]);
+		if (ret)
+			return ret;
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
+	if (ret)
+		return ret;
+
+	while (i++ < cfg->timeout) {
+		ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
+		if (ret)
+			break;
+		if (val == cfg->num_packets)
+			break;
+		sleep(1);
+	}
+
+	he_hssi_report(ctx);
+
+	return ret;
+}
+
+static int he_hssi_init(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_hssi_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
+	if (cfg->port >= NUM_HE_HSSI_PORTS)
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
+
+	return 0;
+}
+
+static int he_hssi_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_hssi_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_hssi_ops = {
+	.init = he_hssi_init,
+	.config = he_hssi_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_hssi_test,
+	.close = he_hssi_close,
+	.dump = he_hssi_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_hssi_drv = {
+	.uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	.ops = &he_hssi_ops
+};
diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h
new file mode 100644
index 0000000..f8b9623
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_HSSI_H_
+#define _HE_HSSI_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_HSSI_UUID_L    0xbb370242ac130002
+#define HE_HSSI_UUID_H    0x823c334c98bf11ea
+#define NUM_HE_HSSI_PORTS 8
+
+extern struct afu_mf_drv he_hssi_drv;
+
+/* HE-HSSI registers definition */
+#define TRAFFIC_CTRL_CMD    0x30
+#define TRAFFIC_CTRL_DATA   0x38
+#define TRAFFIC_CTRL_CH_SEL 0x40
+#define AFU_SCRATCHPAD      0x48
+
+#define TG_NUM_PKT        0x3c00
+#define TG_PKT_LEN_TYPE   0x3c01
+#define TG_DATA_PATTERN   0x3c02
+#define TG_START_XFR      0x3c03
+#define TG_STOP_XFR       0x3c04
+#define TG_SRC_MAC_L      0x3c05
+#define TG_SRC_MAC_H      0x3c06
+#define TG_DST_MAC_L      0x3c07
+#define TG_DST_MAC_H      0x3c08
+#define TG_PKT_XFRD       0x3c09
+#define TG_RANDOM_SEED(n) (0x3c0a + (n))
+#define TG_PKT_LEN        0x3c0d
+
+#define TM_NUM_PKT        0x3d00
+#define TM_PKT_GOOD       0x3d01
+#define TM_PKT_BAD        0x3d02
+#define TM_BYTE_CNT0      0x3d03
+#define TM_BYTE_CNT1      0x3d04
+#define TM_AVST_RX_ERR    0x3d07
+#define   OVERFLOW_ERR    (1 << 9)
+#define   LENGTH_ERR      (1 << 8)
+#define   OVERSIZE_ERR    (1 << 7)
+#define   UNDERSIZE_ERR   (1 << 6)
+#define   MAC_CRC_ERR     (1 << 5)
+#define   PHY_ERR         (1 << 4)
+#define   ERR_VALID       (1 << 3)
+
+#define LOOPBACK_EN          0x3e00
+#define LOOPBACK_FIFO_STATUS 0x3e01
+#define   ALMOST_EMPTY    (1 << 1)
+#define   ALMOST_FULL     (1 << 0)
+
+#define MAILBOX_TIMEOUT_MS       100
+#define MAILBOX_POLL_INTERVAL_MS 10
+
+struct traffic_ctrl_cmd {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_cmd:1;
+			uint32_t write_cmd:1;
+			uint32_t ack_trans:1;
+			uint32_t rsvd1:29;
+			uint32_t afu_cmd_addr:16;
+			uint32_t rsvd2:16;
+		};
+	};
+};
+
+struct traffic_ctrl_data {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_data;
+			uint32_t write_data;
+		};
+	};
+};
+
+struct traffic_ctrl_ch_sel {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t channel_sel:3;
+			uint32_t rsvd1:29;
+			uint32_t rsvd2;
+		};
+	};
+};
+
+struct he_hssi_ctx {
+	uint8_t *addr;
+};
+
+struct he_hssi_priv {
+	struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
+	struct he_hssi_ctx he_hssi_ctx;
+};
+
+#endif /* _HE_HSSI_H_ */
diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c
new file mode 100644
index 0000000..9e99b80
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.c
@@ -0,0 +1,430 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_lbk.h"
+
+static int he_lbk_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	v.mode = cfg->mode;
+	v.trput_interleave = cfg->trput_interleave;
+	if (cfg->multi_cl == 4)
+		v.multicl_len = 2;
+	else
+		v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_dsm_status *stat = NULL;
+	struct he_lbk_status0 stat0;
+	struct he_lbk_status1 stat1;
+	uint64_t swtest_msg = 0;
+	uint64_t ticks = 0;
+	uint64_t info = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	stat = ctx->status_ptr;
+
+	swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
+	stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
+	stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0) {
+		info = rte_read64(ctx->addr + CSR_HE_INFO0);
+		AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
+		cfg->freq_mhz = info & 0xffff;
+		if (cfg->freq_mhz == 0) {
+			AFU_MF_PMD_INFO("Frequency of AFU clock is unknown."
+				" Assuming 350 MHz.");
+			cfg->freq_mhz = 350;
+		}
+	}
+
+	num = (double)stat0.num_reads;
+	rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat0.num_writes;
+	wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Pend_Read Pend_Write "
+		"Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
+		cfg->freq_mhz);
+	printf("%10u  %10u %10u %10u %10u  %12"PRIu64
+		"   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat0.num_reads, stat0.num_writes,
+		stat1.num_pend_reads, stat1.num_pend_writes,
+		ticks, rd_bw / 1e9, wr_bw / 1e9);
+	printf("Test Message: 0x%"PRIx64"\n", swtest_msg);
+}
+
+static int he_lbk_test(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	rte_delay_us(1000);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	/* initialize DMA addresses */
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova),
+		ctx->addr + CSR_SRC_ADDR);
+
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova),
+		ctx->addr + CSR_DST_ADDR);
+
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova),
+		ctx->addr + CSR_AFU_DSM_BASEL);
+	rte_write32(CACHE_LINE_ALIGNED(ctx->dsm_iova) >> 32,
+		ctx->addr + CSR_AFU_DSM_BASEH);
+
+	ret = he_lbk_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CACHE_LINE_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		rte_delay_us(1000);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		he_lbk_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		if (cfg->mode == NLB_MODE_LPBK) {
+			ptr = (uint32_t *)ctx->dest_ptr;
+			j = CACHE_LINE_SIZE(cl) >> 2;
+			for (i = 0; i < j; i++) {
+				if (*ptr++ != i) {
+					AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+					break;
+				}
+			}
+		}
+	}
+
+end:
+	return 0;
+}
+
+static int he_lbk_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int he_lbk_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
+	return 0;
+
+release:
+	he_lbk_ctx_release(dev);
+	return ret;
+}
+
+static int he_lbk_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return he_lbk_ctx_init(dev);
+}
+
+static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
+	if (cfg->mode > NLB_MODE_TRPUT)
+		return -EINVAL;
+	if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
+		(cfg->multi_cl != 4))
+		return -EINVAL;
+	if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin > MAX_CACHE_LINES))
+		return -EINVAL;
+	if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
+
+	return 0;
+}
+
+static int he_lbk_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	he_lbk_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_lbk_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+	fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+	fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+	fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+	fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+	fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+	fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+	fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_lbk_ops = {
+	.init = he_lbk_init,
+	.config = he_lbk_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_lbk_test,
+	.close = he_lbk_close,
+	.dump = he_lbk_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_lbk_drv = {
+	.uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
+
+struct afu_mf_drv he_mem_lbk_drv = {
+	.uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h
new file mode 100644
index 0000000..c2e8a29
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_LBK_H_
+#define _HE_LBK_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_LBK_UUID_L      0xb94b12284c31e02b
+#define HE_LBK_UUID_H      0x56e203e9864f49a7
+#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb
+#define HE_MEM_LBK_UUID_H  0x8568ab4e6ba54616
+
+extern struct afu_mf_drv he_lbk_drv;
+extern struct afu_mf_drv he_mem_lbk_drv;
+
+/* HE-LBK & HE-MEM-LBK registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct he_lbk_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct he_lbk_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t rsvd1:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd2:13;
+			uint32_t trput_interleave:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t rsvd3:2;
+		};
+	};
+};
+
+struct he_lbk_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct he_lbk_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct he_lbk_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+struct he_lbk_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct he_lbk_dsm_status *status_ptr;
+};
+
+struct he_lbk_priv {
+	struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
+	struct he_lbk_ctx he_lbk_ctx;
+};
+
+#endif /* _HE_LBK_H_ */
diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
new file mode 100644
index 0000000..ccbb3a8
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_mem.h"
+
+static int he_mem_tg_test(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+	uint64_t value = 0x12345678;
+	uint64_t cap = 0;
+	uint64_t channel_mask = 0;
+	int i, t = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_mem_tg_cfg;
+	ctx = &priv->he_mem_tg_ctx;
+
+	AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
+
+	rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
+	cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
+	AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
+	if (cap != value) {
+		AFU_MF_PMD_ERR("Test scratchpad register failed");
+		return -EIO;
+	}
+
+	cap = rte_read64(ctx->addr + MEM_TG_CTRL);
+	AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
+
+	channel_mask = cfg->channel_mask & cap;
+	/* start traffic generators */
+	rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
+
+	/* check test status */
+	while (t < MEM_TG_TIMEOUT_MS) {
+		value = rte_read64(ctx->addr + MEM_TG_STAT);
+		for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
+			if (channel_mask & (1 << i)) {
+				if (TGACTIVE(value, i))
+					continue;
+				printf("TG channel %d test %s\n", i,
+					TGPASS(value, i) ? "pass" :
+					TGTIMEOUT(value, i) ? "timeout" :
+					TGFAIL(value, i) ? "fail" : "error");
+				channel_mask &= ~(1 << i);
+			}
+		}
+		if (!channel_mask)
+			break;
+		rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
+		t += MEM_TG_POLL_INTERVAL_MS;
+	}
+
+	if (channel_mask) {
+		AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value);
+		return channel_mask;
+	}
+
+	return 0;
+}
+
+static int he_mem_tg_init(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_mem_tg_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_mem_tg_priv *priv = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv->he_mem_tg_cfg));
+
+	return 0;
+}
+
+static int he_mem_tg_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_mem_tg_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_mem_tg_ops = {
+	.init = he_mem_tg_init,
+	.config = he_mem_tg_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_mem_tg_test,
+	.close = he_mem_tg_close,
+	.dump = he_mem_tg_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_mem_tg_drv = {
+	.uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	.ops = &he_mem_tg_ops
+};
diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h
new file mode 100644
index 0000000..82404b6
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_MEM_H_
+#define _HE_MEM_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
+#define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
+
+#define NUM_MEM_TG_CHANNELS      4
+#define MEM_TG_TIMEOUT_MS     5000
+#define MEM_TG_POLL_INTERVAL_MS 10
+
+extern struct afu_mf_drv he_mem_tg_drv;
+
+/* MEM-TG registers definition */
+#define MEM_TG_SCRATCHPAD   0x28
+#define MEM_TG_CTRL         0x30
+#define   TGCONTROL(n)      (1 << (n))
+#define MEM_TG_STAT         0x38
+#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
+#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
+#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
+#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
+#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
+
+struct he_mem_tg_ctx {
+	uint8_t *addr;
+};
+
+struct he_mem_tg_priv {
+	struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
+	struct he_mem_tg_ctx he_mem_tg_ctx;
+};
+
+#endif /* _HE_MEM_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
new file mode 100644
index 0000000..f304bc8
--- /dev/null
+++ b/drivers/raw/afu_mf/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022 Intel Corporation
+
+deps += ['rawdev', 'bus_pci', 'bus_ifpga']
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
+	'he_hssi.c')
+
+headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c
new file mode 100644
index 0000000..9b48125
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.c
@@ -0,0 +1,2007 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+
+static int nlb_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->nlb_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	if (cfg->cache_policy == NLB_WRPUSH_I)
+		v.wrpush_i = 1;
+	else
+		v.wrthru_en = cfg->cache_policy;
+
+	if (cfg->cache_hint == NLB_RDLINE_MIXED)
+		v.rdsel = 3;
+	else
+		v.rdsel = cfg->cache_hint;
+
+	v.mode = cfg->mode;
+	v.chsel = cfg->read_vc;
+	v.wr_chsel = cfg->write_vc;
+	v.wrfence_chsel = cfg->wrfence_vc;
+	v.wrthru_en = cfg->cache_policy;
+	v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_dsm_status *stat = NULL;
+	uint64_t ticks = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	cfg = &priv->nlb_cfg;
+	stat = priv->nlb_ctx.status_ptr;
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0)
+		cfg->freq_mhz = 200;
+
+	num = (double)stat->num_reads;
+	rd_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat->num_writes;
+	wr_bw = (num * CACHE_LINE_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
+		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
+	printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat->num_reads, stat->num_writes, ticks,
+		rd_bw / 1e9, wr_bw / 1e9);
+}
+
+static int nlb_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	ctx = &priv->nlb_ctx;
+	cfg = &priv->nlb_cfg;
+
+	/* initialize registers */
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->src_iova),
+		ctx->addr + CSR_SRC_ADDR);
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(CACHE_LINE_ALIGNED(ctx->dest_iova),
+		ctx->addr + CSR_DST_ADDR);
+
+	ret = nlb_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CACHE_LINE_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CACHE_LINE_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
+
+		rte_delay_us(10);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		nlb_afu_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		ptr = (uint32_t *)ctx->dest_ptr;
+		j = CACHE_LINE_SIZE(cl) >> 2;
+		for (i = 0; i < j; i++) {
+			if (*ptr++ != i) {
+				AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+				break;
+			}
+		}
+	}
+
+end:
+	return ret;
+}
+
+static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
+{
+	int i = 0;
+
+	if (!ctx)
+		return;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		rte_free(ctx->dma_buf[i]);
+		ctx->dma_buf[i] = NULL;
+	}
+
+	rte_free(ctx->data_buf);
+	ctx->data_buf = NULL;
+
+	rte_free(ctx->ref_buf);
+	ctx->ref_buf = NULL;
+}
+
+static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
+	struct rte_pmd_afu_dma_cfg *cfg)
+{
+	size_t page_sz = sysconf(_SC_PAGE_SIZE);
+	int i, ret = 0;
+
+	if (!ctx || !cfg)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
+			TEST_MEM_ALIGN);
+		if (!ctx->dma_buf[i]) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
+		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
+	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->data_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->ref_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
+{
+	int *ptr = NULL;
+	size_t i = 0;
+	size_t dword_size = 0;
+
+	if (!ctx || !size)
+		return;
+
+	ptr = (int *)ctx->ref_buf;
+
+	if (ctx->pattern) {
+		memset(ptr, ctx->pattern, size);
+	} else {
+		srand(99);
+		dword_size = size >> 2;
+		for (i = 0; i < dword_size; i++)
+			*ptr++ = rand();
+	}
+	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
+}
+
+static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
+{
+	uint8_t *src = NULL;
+	uint8_t *dst = NULL;
+	size_t i = 0;
+	int n = 0;
+
+	if (!ctx || !size)
+		return -EINVAL;
+
+	src = (uint8_t *)ctx->ref_buf;
+	dst = (uint8_t *)ctx->data_buf;
+
+	if (memcmp(src, dst, size)) {
+		printf("Transfer is corrupted\n");
+		if (ctx->verbose) {
+			for (i = 0; i < size; i++) {
+				if (*src != *dst) {
+					if (++n >= ERR_CHECK_LIMIT)
+						break;
+					printf("Mismatch at 0x%zx, "
+						"Expected %02x  Actual %02x\n",
+						i, *src, *dst);
+				}
+				src++;
+				dst++;
+			}
+			if (n < ERR_CHECK_LIMIT) {
+				printf("Found %d error bytes\n", n);
+			} else {
+				printf("......\n");
+				printf("Found more than %d error bytes\n", n);
+			}
+		}
+		return -1;
+	}
+
+	printf("Transfer is verified\n");
+	return 0;
+}
+
+static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		rte_write64(*host_addr, dev_addr);
+}
+
+static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		*host_addr = rte_read64(dev_addr);
+}
+
+static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
+{
+	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
+
+	if (!ctx)
+		return;
+
+	if (requested_page != ctx->cur_ase_page) {
+		rte_write64(requested_page, ctx->ase_ctrl_addr);
+		ctx->cur_ase_page = requested_page;
+	}
+}
+
+static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
+
+	/* write back to device */
+	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
+
+	return 0;
+}
+
+static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(dst))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(dst)) {
+		/* Write out a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+
+		rte_write32(*(uint32_t *)(uintptr_t)src,
+			ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Write out blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)(uintptr_t)src, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Write out remaining DWORD */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		rte_write32(*(uint32_t *)(uintptr_t)src,
+			ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t count)
+{
+	uint64_t dst = *dst_ptr;
+	uint64_t src = *src_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* aligns address to 8 byte using dst masking method */
+	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
+		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		src += unaligned_size;
+		dst += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_write(ctx, &dst, &src, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using dst masking method */
+	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
+
+	return 0;
+}
+
+static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(src))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(src)) {
+		/* Read a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)(uintptr_t)dst =
+			rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Read blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= src & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)(uintptr_t)dst, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Read remaining DWORD */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)(uintptr_t)dst =
+			rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* Aligns address to 8 byte using src masking method */
+	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
+		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		dst += unaligned_size;
+		src += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_read(ctx, &src, &dst, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using src masking method */
+	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static void clear_interrupt(struct dma_afu_ctx *ctx)
+{
+	/* clear interrupt by writing 1 to IRQ bit in status register */
+	msgdma_status status;
+
+	if (!ctx)
+		return;
+
+	status.csr = 0;
+	status.irq = 1;
+	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
+}
+
+static int poll_interrupt(struct dma_afu_ctx *ctx)
+{
+	struct pollfd pfd = {0};
+	uint64_t count = 0;
+	ssize_t bytes_read = 0;
+	int poll_ret = 0;
+	int ret = 0;
+
+	if (!ctx || (ctx->event_fd < 0))
+		return -EINVAL;
+
+	pfd.fd = ctx->event_fd;
+	pfd.events = POLLIN;
+	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
+	if (poll_ret < 0) {
+		AFU_MF_PMD_ERR("Error %s", strerror(errno));
+		ret = -EFAULT;
+		goto out;
+	} else if (poll_ret == 0) {
+		AFU_MF_PMD_ERR("Timeout");
+		ret = -ETIMEDOUT;
+	} else {
+		bytes_read = read(pfd.fd, &count, sizeof(count));
+		if (bytes_read > 0) {
+			if (ctx->verbose)
+				AFU_MF_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
+					poll_ret, count);
+			ret = 0;
+		} else {
+			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
+				strerror(errno) : "zero bytes read");
+			ret = -EIO;
+		}
+	}
+out:
+	clear_interrupt(ctx);
+	return ret;
+}
+
+static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
+{
+	msgdma_status status;
+	uint64_t fpga_queue_full = 0;
+
+	if (!ctx)
+		return;
+
+	if (ctx->verbose) {
+		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
+			desc->rd_address_ext, desc->rd_address);
+		AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
+			desc->wr_address_ext, desc->wr_address);
+		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
+		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
+			desc->wr_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
+			desc->rd_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
+		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
+	}
+
+	do {
+		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
+		if (fpga_queue_full++ > 100000000) {
+			AFU_MF_PMD_DEBUG("DMA queue full retry");
+			fpga_queue_full = 0;
+		}
+	} while (status.desc_buf_full);
+
+	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
+		sizeof(*desc));
+}
+
+static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	int count, int is_last_desc, fpga_dma_type type, int intr_en)
+{
+	msgdma_ext_desc *desc = NULL;
+	int alignment_offset = 0;
+	int segment_size = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	/* src, dst and count must be 64-byte aligned */
+	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
+		!IS_DMA_ALIGNED(count))
+		return -EINVAL;
+	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
+
+	/* these fields are fixed for all DMA transfers */
+	desc = ctx->desc_buf;
+	desc->seq_num = 0;
+	desc->wr_stride = 1;
+	desc->rd_stride = 1;
+	desc->control.go = 1;
+	if (intr_en)
+		desc->control.transfer_irq_en = 1;
+	else
+		desc->control.transfer_irq_en = 0;
+
+	if (!is_last_desc)
+		desc->control.early_done_en = 1;
+	else
+		desc->control.early_done_en = 0;
+
+	if (type == FPGA_TO_FPGA) {
+		desc->rd_address = src & DMA_MASK_32_BIT;
+		desc->wr_address = dst & DMA_MASK_32_BIT;
+		desc->len = count;
+		desc->wr_burst_count = 4;
+		desc->rd_burst_count = 4;
+		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+		send_descriptor(ctx, desc);
+	} else {
+		/* check CCIP (host) address is aligned to 4CL (256B) */
+		alignment_offset = (type == HOST_TO_FPGA)
+			? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
+		/* performing a short transfer to get aligned */
+		if (alignment_offset != 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* count isn't large enough to hit next 4CL boundary */
+			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = CCIP_ALIGN_BYTES
+					- alignment_offset;
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			/* post short transfer to align to a 4CL (256 byte) */
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* at this point we are 4CL (256 byte) aligned */
+		if (count >= CCIP_ALIGN_BYTES) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 4;
+			desc->rd_burst_count = 4;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* buffer ends on 4CL boundary */
+			if ((count % CCIP_ALIGN_BYTES) == 0) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = count
+					- (count % CCIP_ALIGN_BYTES);
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* post short transfer to handle the remainder */
+		if (count > 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->len = count;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			if (intr_en)
+				desc->control.transfer_irq_en = 1;
+			send_descriptor(ctx, desc);
+		}
+	}
+
+	return 0;
+}
+
+static int issue_magic(struct dma_afu_ctx *ctx)
+{
+	*(ctx->magic_buf) = 0ULL;
+	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
+		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
+}
+
+static void wait_magic(struct dma_afu_ctx *ctx)
+{
+	int magic_timeout = 0;
+
+	if (!ctx)
+		return;
+
+	poll_interrupt(ctx);
+	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
+		if (magic_timeout++ > 1000) {
+			AFU_MF_PMD_ERR("DMA magic operation timeout");
+			magic_timeout = 0;
+			break;
+		}
+	}
+	*(ctx->magic_buf) = 0ULL;
+}
+
+static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, int *intr_issued)
+{
+	int intr_en = 0;
+	int ret = 0;
+
+	if (!ctx || !intr_issued)
+		return -EINVAL;
+
+	src += chunk * ctx->dma_buf_size;
+	dst += chunk * ctx->dma_buf_size;
+
+	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
+		if (*intr_issued) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+		intr_en = 1;
+	}
+
+	chunk %= NUM_DMA_BUF;
+	rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
+		ctx->dma_buf_size);
+	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
+			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
+	if (intr_en)
+		*intr_issued = 1;
+
+	return ret;
+}
+
+static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t dma_tx_bytes = 0;
+	uint64_t offset = 0;
+	int issued_intr = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(dst)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_host_to_fpga(ctx, &dst, &src, count_left);
+
+		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
+			* DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - dst;
+		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_tx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1), &issued_intr);
+			if (ret)
+				return ret;
+		}
+
+		if (issued_intr) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+
+		if (count_left) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_tx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_tx_bytes);
+				rte_memcpy(ctx->dma_buf[0],
+					(void *)(uintptr_t)(src + offset),
+					dma_tx_bytes);
+				ret = do_dma(ctx, dst + offset,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
+				if (ret)
+					return ret;
+				ret = poll_interrupt(ctx);
+				if (ret)
+					return ret;
+			}
+
+			count_left -= dma_tx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_tx_bytes;
+				src += offset + dma_tx_bytes;
+				ret = ase_host_to_fpga(ctx, &dst, &src,
+					count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
+{
+	uint64_t i = chunk % NUM_DMA_BUF;
+	uint64_t n = *rx_count;
+	uint64_t num_pending = 0;
+	int ret = 0;
+
+	if (!ctx || !wf_issued)
+		return -EINVAL;
+
+	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
+		src + chunk * ctx->dma_buf_size,
+		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
+	if (ret)
+		return ret;
+
+	num_pending = chunk - n + 1;
+	if (num_pending == HALF_DMA_BUF) {
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
+		if (*wf_issued) {
+			wait_magic(ctx);
+			for (i = 0; i < HALF_DMA_BUF; i++) {
+				rte_memcpy((void *)(uintptr_t)(dst +
+						n * ctx->dma_buf_size),
+					ctx->dma_buf[n % NUM_DMA_BUF],
+					ctx->dma_buf_size);
+				n++;
+			}
+			*wf_issued = 0;
+			*rx_count = n;
+		}
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t pending_buf = 0;
+	uint64_t dma_rx_bytes = 0;
+	uint64_t offset = 0;
+	int wf_issued = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(src)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_fpga_to_host(ctx, &src, &dst, count_left);
+
+		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
+			 * DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - src;
+		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_rx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1),
+				&pending_buf, &wf_issued);
+			if (ret)
+				return ret;
+		}
+
+		if (wf_issued)
+			wait_magic(ctx);
+
+		/* clear out final dma memcpy operations */
+		while (pending_buf < dma_chunks) {
+			/* constant size transfer; no length check required */
+			rte_memcpy((void *)(uintptr_t)(dst +
+					pending_buf * ctx->dma_buf_size),
+				ctx->dma_buf[pending_buf % NUM_DMA_BUF],
+				ctx->dma_buf_size);
+			pending_buf++;
+		}
+
+		if (count_left > 0) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_rx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_rx_bytes);
+				ret = do_dma(ctx,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					src + offset,
+					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
+				if (ret)
+					return ret;
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+				rte_memcpy((void *)(uintptr_t)(dst + offset),
+					ctx->dma_buf[0], dma_rx_bytes);
+			}
+
+			count_left -= dma_rx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_rx_bytes;
+				src += offset + dma_rx_bytes;
+				ret = ase_fpga_to_host(ctx, &src, &dst,
+							count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t dma_chunks = 0;
+	uint64_t offset = 0;
+	uint32_t tx_chunks = 0;
+	uint64_t *tmp_buf = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (0x%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
+	    && IS_DMA_ALIGNED(count_left)) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
+				(i == (dma_chunks - 1))) {
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+			}
+		}
+
+		if (count_left > 0) {
+			AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
+			ret = do_dma(ctx, dst + offset, src + offset,
+				count_left, 1, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			ret = issue_magic(ctx);
+			if (ret)
+				return ret;
+			wait_magic(ctx);
+		}
+	} else {
+		if ((src < dst) && (src + count_left > dst)) {
+			AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
+				" -> 0x%"PRIx64" (0x%"PRIx64")",
+				src, dst, count_left);
+			return -EINVAL;
+		}
+		tx_chunks = count_left / ctx->dma_buf_size;
+		offset = tx_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
+			" (%u...0x%"PRIx64")",
+			src, dst, tx_chunks, count_left);
+		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
+			DMA_ALIGN_BYTES);
+		for (i = 0; i < tx_chunks; i++) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx,
+				dst + i * ctx->dma_buf_size,
+				(uint64_t)tmp_buf, ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+		}
+
+		if (count_left > 0) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + offset, count_left);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx, dst + offset,
+				(uint64_t)tmp_buf, count_left);
+			if (ret)
+				goto free_buf;
+		}
+free_buf:
+		rte_free(tmp_buf);
+	}
+
+	return ret;
+}
+
+static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
+	uint64_t src, size_t count, fpga_dma_type type)
+{
+	int ret = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (type == HOST_TO_FPGA)
+		ret = dma_host_to_fpga(ctx, dst, src, count);
+	else if (type == FPGA_TO_HOST)
+		ret = dma_fpga_to_host(ctx, dst, src, count);
+	else if (type == FPGA_TO_FPGA)
+		ret = dma_fpga_to_fpga(ctx, dst, src, count);
+	else
+		return -EINVAL;
+
+	return ret;
+}
+
+static double getTime(struct timespec start, struct timespec end)
+{
+	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
+		+ end.tv_nsec - start.tv_nsec;
+	return (double)diff / (double)1000000000L;
+}
+
+#define SWEEP_ITERS 1
+static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
+	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
+{
+	struct timespec start, end;
+	uint64_t test_size = 0;
+	uint64_t *dma_buf_ptr = NULL;
+	double throughput, total_time = 0.0;
+	int i = 0;
+	int ret = 0;
+
+	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
+		AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
+		return -EINVAL;
+	}
+
+	if (length < (buf_offset + size_decrement)) {
+		AFU_MF_PMD_ERR("Test length does not match unaligned parameter");
+		return -EINVAL;
+	}
+	test_size = length - (buf_offset + size_decrement);
+	if ((ddr_offset + test_size) > ctx->mem_size) {
+		AFU_MF_PMD_ERR("Test is out of DDR memory space");
+		return -EINVAL;
+	}
+
+	dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
+	printf("Sweep Host %p to FPGA 0x%"PRIx64
+		" with 0x%"PRIx64" bytes ...\n",
+		(void *)dma_buf_ptr, ddr_offset, test_size);
+
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
+			test_size, HOST_TO_FPGA);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
+		ddr_offset, (void *)dma_buf_ptr, test_size);
+
+	total_time = 0.0;
+	memset((char *)dma_buf_ptr, 0, test_size);
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
+			test_size, FPGA_TO_HOST);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Verifying buffer ...\n");
+	return dma_afu_buf_verify(ctx, test_size);
+}
+
+static int dma_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_dma_cfg *cfg = NULL;
+	msgdma_ctrl ctrl;
+	uint64_t offset = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->dma_cfg;
+	if (cfg->index >= NUM_N3000_DMA)
+		return -EINVAL;
+	ctx = &priv->dma_ctx[cfg->index];
+
+	ctx->pattern = (int)cfg->pattern;
+	ctx->verbose = (int)cfg->verbose;
+	ctx->dma_buf_size = cfg->size;
+
+	ret = dma_afu_buf_alloc(ctx, cfg);
+	if (ret)
+		goto free;
+
+	printf("Initialize test buffer\n");
+	dma_afu_buf_init(ctx, cfg->length);
+
+	/* enable interrupt */
+	ctrl.csr = 0;
+	ctrl.global_intr_en_mask = 1;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
+		cfg->offset, cfg->length);
+	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
+		cfg->length, HOST_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from host to FPGA");
+		goto end;
+	}
+	memset(ctx->data_buf, 0, cfg->length);
+
+	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
+		offset = cfg->offset + cfg->length;
+	else if (cfg->offset > cfg->length)
+		offset = 0;
+	else
+		goto end;
+
+	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
+		cfg->offset, offset, cfg->length);
+	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
+		FPGA_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to FPGA");
+		goto end;
+	}
+
+	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	printf("Sweep with aligned address and size\n");
+	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
+	if (ret)
+		goto end;
+
+	if (cfg->unaligned) {
+		printf("Sweep with unaligned address and size\n");
+		struct unaligned_set {
+			uint64_t addr_offset;
+			uint64_t size_dec;
+		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
+		for (i = 0; i < ARRAY_SIZE(param); i++) {
+			ret = sweep_test(ctx, cfg->length, cfg->offset,
+				param[i].addr_offset, param[i].size_dec);
+			if (ret)
+				break;
+		}
+	}
+
+end:
+	/* disable interrupt */
+	ctrl.global_intr_en_mask = 0;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev *dev)
+{
+	struct rte_afu_device *afudev = NULL;
+
+	if (!dev || !dev->rawdev || !dev->rawdev->device)
+		return NULL;
+
+	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
+	if (!afudev->rawdev || !afudev->rawdev->device)
+		return NULL;
+
+	return RTE_DEV_TO_PCI(afudev->rawdev->device);
+}
+
+#ifdef VFIO_PRESENT
+static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
+	uint32_t count, int *efds)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	struct vfio_irq_set *irq_set = NULL;
+	int vfio_dev_fd = 0;
+	size_t sz = 0;
+	int ret = 0;
+
+	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
+		return -EINVAL;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return -ENODEV;
+	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
+
+	sz = sizeof(*irq_set) + sizeof(*efds) * count;
+	irq_set = rte_zmalloc(NULL, sz, 0);
+	if (!irq_set)
+		return -ENOMEM;
+
+	irq_set->argsz = (uint32_t)sz;
+	irq_set->count = count;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+		VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = vec_start;
+
+	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
+	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret)
+		AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
+
+	rte_free(irq_set);
+	return ret;
+}
+#endif
+
+static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint32_t bar = 0;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return NULL;
+
+	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
+	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
+	if (!PORT_IMPLEMENTED(val)) {
+		AFU_MF_PMD_INFO("FIU port %d is not implemented", dev->port);
+		return NULL;
+	}
+
+	bar = PORT_BAR(val);
+	if (bar >= PCI_MAX_RESOURCE) {
+		AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
+		return NULL;
+	}
+
+	addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
+	return addr;
+}
+
+static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
+	uint32_t *vec_start, uint32_t *vec_count)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint64_t header = 0;
+	uint64_t next_offset = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
+			(DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
+			val = rte_read64(addr + PORT_UINT_CAP_REG);
+			if (vec_start)
+				*vec_start = PORT_VEC_START(val);
+			if (vec_count)
+				*vec_count = PORT_VEC_COUNT(val);
+			return 0;
+		}
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return -ENOENT;
+}
+
+static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+	ctx->addr = addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
+	return 0;
+
+release:
+	nlb_afu_ctx_release(dev);
+	return ret;
+}
+
+static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[0];
+
+	rte_free(ctx->desc_buf);
+	ctx->desc_buf = NULL;
+
+	rte_free(ctx->magic_buf);
+	ctx->magic_buf = NULL;
+
+	close(ctx->event_fd);
+	return 0;
+}
+
+static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
+	static int efds[1] = {0};
+	uint32_t vec_start = 0;
+	int ret = 0;
+
+	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[index];
+	ctx->index = index;
+	ctx->addr = addr;
+	ctx->csr_addr = addr + DMA_CSR;
+	ctx->desc_addr = addr + DMA_DESC;
+	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
+	ctx->ase_data_addr = addr + DMA_ASE_DATA;
+	ctx->mem_size = mem_sz[ctx->index];
+	ctx->cur_ase_page = INVALID_ASE_PAGE;
+	if (ctx->index == 0) {
+		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
+		if (ret)
+			return ret;
+
+		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+		if (efds[0] < 0) {
+			AFU_MF_PMD_ERR("eventfd create failed");
+			return -EBADF;
+		}
+#ifdef VFIO_PRESENT
+		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
+			AFU_MF_PMD_ERR("DMA interrupt setup failed");
+#endif
+	}
+	ctx->event_fd = efds[0];
+
+	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
+		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
+	if (!ctx->desc_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->magic_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
+	if (ctx->magic_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	return 0;
+
+release:
+	dma_afu_ctx_release(dev);
+	return ret;
+}
+
+static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	uint8_t *addr = NULL;
+	uint64_t header = 0;
+	uint64_t uuid_hi = 0;
+	uint64_t uuid_lo = 0;
+	uint64_t next_offset = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	addr = (uint8_t *)dev->addr;
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
+		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
+
+		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
+			(uuid_lo == N3000_NLB0_UUID_L) &&
+			(uuid_hi == N3000_NLB0_UUID_H)) {
+			AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
+			ret = nlb_afu_ctx_init(dev, addr);
+			if (ret)
+				return ret;
+		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
+			(uuid_lo == N3000_DMA_UUID_L) &&
+			(uuid_hi == N3000_DMA_UUID_H) &&
+			(priv->num_dma < NUM_N3000_DMA)) {
+			AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
+				priv->num_dma, (void *)addr);
+			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
+			if (ret)
+				return ret;
+			priv->num_dma++;
+		} else {
+			AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
+				", uuid %016"PRIx64"%016"PRIx64,
+				DFH_TYPE(header), uuid_hi, uuid_lo);
+		}
+
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return 0;
+}
+
+static int n3000_afu_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return n3000_afu_ctx_init(dev);
+}
+
+static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
+	int i = 0;
+	uint64_t top = 0;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
+	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
+		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
+			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
+			return -EINVAL;
+		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.multi_cl != 1) &&
+			(cfg->nlb_cfg.multi_cl != 2) &&
+			(cfg->nlb_cfg.multi_cl != 4))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
+			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
+			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
+			return -EINVAL;
+		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
+			sizeof(struct rte_pmd_afu_nlb_cfg));
+	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
+		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
+			return -EINVAL;
+		i = cfg->dma_cfg.index;
+		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
+		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
+			return -EINVAL;
+		if (i == 3) {  /* QDR connected to DMA3 */
+			if (cfg->dma_cfg.length & 0x3f) {
+				cfg->dma_cfg.length &= ~0x3f;
+				AFU_MF_PMD_INFO("Round size to %x for QDR",
+					cfg->dma_cfg.length);
+			}
+		}
+		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
+			sizeof(struct rte_pmd_afu_dma_cfg));
+	} else {
+		AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
+		return -EINVAL;
+	}
+
+	priv->cfg_type = cfg->type;
+	return 0;
+}
+
+static int n3000_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		AFU_MF_PMD_INFO("Test NLB");
+		ret = nlb_afu_test(dev);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
+		ret = dma_afu_test(dev);
+	} else {
+		AFU_MF_PMD_ERR("Please configure AFU before test");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int n3000_afu_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	nlb_afu_ctx_release(dev);
+	dma_afu_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct n3000_afu_priv *priv = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+		fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+		fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+		fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
+		fprintf(f, "index:\t\t%d\n", ctx->index);
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
+		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
+		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
+		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
+		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
+		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
+		fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int n3000_afu_reset(struct afu_mf_rawdev *dev)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	val = rte_read64(addr + PORT_CTRL_REG);
+	val |= PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+	rte_delay_us(100);
+	val &= ~PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+
+	return 0;
+}
+
+static struct afu_mf_ops n3000_afu_ops = {
+	.init = n3000_afu_init,
+	.config = n3000_afu_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = n3000_afu_test,
+	.close = n3000_afu_close,
+	.dump = n3000_afu_dump,
+	.reset = n3000_afu_reset
+};
+
+struct afu_mf_drv n3000_afu_drv = {
+	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	.ops = &n3000_afu_ops
+};
diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h
new file mode 100644
index 0000000..4c740da
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _N3000_AFU_H_
+#define _N3000_AFU_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define N3000_AFU_UUID_L  0xc000c9660d824272
+#define N3000_AFU_UUID_H  0x9aeffe5f84570612
+#define N3000_NLB0_UUID_L 0xf89e433683f9040b
+#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
+#define N3000_DMA_UUID_L  0xa9149a35bace01ea
+#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
+
+extern struct afu_mf_drv n3000_afu_drv;
+
+#define NUM_N3000_DMA  4
+#define MAX_MSIX_VEC   7
+
+/* N3000 DFL definition */
+#define DFH_UUID_L_OFFSET  8
+#define DFH_UUID_H_OFFSET  16
+#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
+#define DFH_TYPE_AFU  1
+#define DFH_TYPE_BBB  2
+#define DFH_TYPE_PRIVATE  3
+#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
+#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
+#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
+#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
+#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
+#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
+#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
+#define PORT_FEATURE_UINT_ID  0x12
+#define PORT_UINT_CAP_REG  0x8
+#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
+#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
+#define PORT_CTRL_REG  0x38
+#define PORT_SOFT_RESET  (0x1 << 0)
+
+/* NLB registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_STATUS         0x40
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct nlb_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct nlb_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t wrthru_en:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd1:1;
+			uint32_t delay_en:1;
+			uint32_t rdsel:2;
+			uint32_t rsvd2:1;
+			uint32_t chsel:3;
+			uint32_t rsvd3:1;
+			uint32_t wrpush_i:1;
+			uint32_t wr_chsel:3;
+			uint32_t rsvd4:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t wrfence_chsel:2;
+		};
+	};
+};
+
+struct nlb_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct nlb_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct nlb_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+/* DMA registers definition */
+#define DMA_CSR       0x40
+#define DMA_DESC      0x60
+#define DMA_ASE_CTRL  0x200
+#define DMA_ASE_DATA  0x1000
+
+#define DMA_ASE_WINDOW       4096
+#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
+#define INVALID_ASE_PAGE     0xffffffffffffffffULL
+
+#define DMA_WF_MAGIC             0x5772745F53796E63ULL
+#define DMA_WF_MAGIC_ROM         0x1000000000000
+#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
+#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
+
+#define NUM_DMA_BUF   8
+#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
+
+#define DMA_MASK_32_BIT 0xFFFFFFFF
+
+#define DMA_CSR_BUSY           0x1
+#define DMA_DESC_BUFFER_EMPTY  0x2
+#define DMA_DESC_BUFFER_FULL   0x4
+
+#define DWORD_BYTES 4
+#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
+
+#define QWORD_BYTES 8
+#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
+
+#define DMA_ALIGN_BYTES 64
+#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
+
+#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
+
+#define DMA_TIMEOUT_MSEC  5000
+
+#define MAGIC_BUF_SIZE  64
+#define ERR_CHECK_LIMIT  64
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+typedef enum {
+	HOST_TO_FPGA = 0,
+	FPGA_TO_HOST,
+	FPGA_TO_FPGA,
+	FPGA_MAX_TRANSFER_TYPE,
+} fpga_dma_type;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t tx_channel:8;
+		uint32_t generate_sop:1;
+		uint32_t generate_eop:1;
+		uint32_t park_reads:1;
+		uint32_t park_writes:1;
+		uint32_t end_on_eop:1;
+		uint32_t reserved_1:1;
+		uint32_t transfer_irq_en:1;
+		uint32_t early_term_irq_en:1;
+		uint32_t trans_error_irq_en:8;
+		uint32_t early_done_en:1;
+		uint32_t reserved_2:6;
+		uint32_t go:1;
+	};
+} msgdma_desc_ctrl;
+
+typedef struct __rte_packed {
+	uint32_t rd_address;
+	uint32_t wr_address;
+	uint32_t len;
+	uint16_t seq_num;
+	uint8_t rd_burst_count;
+	uint8_t wr_burst_count;
+	uint16_t rd_stride;
+	uint16_t wr_stride;
+	uint32_t rd_address_ext;
+	uint32_t wr_address_ext;
+	msgdma_desc_ctrl control;
+} msgdma_ext_desc;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t busy:1;
+		uint32_t desc_buf_empty:1;
+		uint32_t desc_buf_full:1;
+		uint32_t rsp_buf_empty:1;
+		uint32_t rsp_buf_full:1;
+		uint32_t stopped:1;
+		uint32_t resetting:1;
+		uint32_t stopped_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t irq:1;
+		uint32_t reserved:22;
+	};
+} msgdma_status;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t stop_dispatcher:1;
+		uint32_t reset_dispatcher:1;
+		uint32_t stop_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t global_intr_en_mask:1;
+		uint32_t stop_descriptors:1;
+		uint32_t reserved:22;
+	};
+} msgdma_ctrl;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_fill_level:16;
+		uint32_t wr_fill_level:16;
+	};
+} msgdma_fill_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rsp_fill_level:16;
+		uint32_t reserved:16;
+	};
+} msgdma_rsp_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_seq_num:16;
+		uint32_t wr_seq_num:16;
+	};
+} msgdma_seq_num;
+
+typedef struct __rte_packed {
+	msgdma_status status;
+	msgdma_ctrl ctrl;
+	msgdma_fill_level fill_level;
+	msgdma_rsp_level rsp;
+	msgdma_seq_num seq_num;
+} msgdma_csr;
+
+#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
+#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
+
+struct nlb_afu_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct nlb_dsm_status *status_ptr;
+};
+
+struct dma_afu_ctx {
+	int index;
+	uint8_t *addr;
+	uint8_t *csr_addr;
+	uint8_t *desc_addr;
+	uint8_t *ase_ctrl_addr;
+	uint8_t *ase_data_addr;
+	uint64_t mem_size;
+	uint64_t cur_ase_page;
+	int event_fd;
+	int verbose;
+	int pattern;
+	void *data_buf;
+	void *ref_buf;
+	msgdma_ext_desc *desc_buf;
+	uint64_t *magic_buf;
+	uint64_t magic_iova;
+	uint32_t dma_buf_size;
+	uint64_t *dma_buf[NUM_DMA_BUF];
+	uint64_t dma_iova[NUM_DMA_BUF];
+};
+
+struct n3000_afu_priv {
+	struct rte_pmd_afu_nlb_cfg nlb_cfg;
+	struct rte_pmd_afu_dma_cfg dma_cfg;
+	struct nlb_afu_ctx nlb_ctx;
+	struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
+	int num_dma;
+	int cfg_type;
+};
+
+#endif /* _N3000_AFU_H_ */
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
new file mode 100644
index 0000000..89d866a
--- /dev/null
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __RTE_PMD_AFU_H__
+#define __RTE_PMD_AFU_H__
+
+/**
+ * @file rte_pmd_afu.h
+ *
+ * AFU PMD specific definitions.
+ *
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_PMD_AFU_N3000_NLB   1
+#define RTE_PMD_AFU_N3000_DMA   2
+
+#define NLB_MODE_LPBK      0
+#define NLB_MODE_READ      1
+#define NLB_MODE_WRITE     2
+#define NLB_MODE_TRPUT     3
+
+#define NLB_VC_AUTO        0
+#define NLB_VC_VL0         1
+#define NLB_VC_VH0         2
+#define NLB_VC_VH1         3
+#define NLB_VC_RANDOM      4
+
+#define NLB_WRLINE_M       0
+#define NLB_WRLINE_I       1
+#define NLB_WRPUSH_I       2
+
+#define NLB_RDLINE_S       0
+#define NLB_RDLINE_I       1
+#define NLB_RDLINE_MIXED   2
+
+#define MIN_CACHE_LINES   1
+#define MAX_CACHE_LINES   1024
+
+#define MIN_DMA_BUF_SIZE  64
+#define MAX_DMA_BUF_SIZE  (1023 * 1024)
+
+/**
+ * NLB AFU configuration data structure.
+ */
+struct rte_pmd_afu_nlb_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t cache_policy;
+	uint32_t cache_hint;
+	uint32_t read_vc;
+	uint32_t write_vc;
+	uint32_t wrfence_vc;
+	uint32_t freq_mhz;
+};
+
+/**
+ * DMA AFU configuration data structure.
+ */
+struct rte_pmd_afu_dma_cfg {
+	uint32_t index;     /* index of DMA controller */
+	uint32_t length;    /* total length of data to DMA */
+	uint32_t offset;    /* address offset of target memory */
+	uint32_t size;      /* size of transfer buffer */
+	uint32_t pattern;   /* data pattern to fill in test buffer */
+	uint32_t unaligned; /* use unaligned address or length in sweep test */
+	uint32_t verbose;   /* enable verbose error information in test */
+};
+
+/**
+ * N3000 AFU configuration data structure.
+ */
+struct rte_pmd_afu_n3000_cfg {
+	int type;   /* RTE_PMD_AFU_N3000_NLB or RTE_PMD_AFU_N3000_DMA */
+	union {
+		struct rte_pmd_afu_nlb_cfg nlb_cfg;
+		struct rte_pmd_afu_dma_cfg dma_cfg;
+	};
+};
+
+/**
+ * HE-LBK & HE-MEM-LBK AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_lbk_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t trput_interleave;
+	uint32_t freq_mhz;
+};
+
+/**
+ * HE-MEM-TG AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_mem_tg_cfg {
+	uint32_t channel_mask;   /* mask of traffic generator channel */
+};
+
+/**
+ * HE-HSSI AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_hssi_cfg {
+	uint32_t port;
+	uint32_t timeout;
+	uint32_t num_packets;
+	uint32_t random_length;
+	uint32_t packet_length;
+	uint32_t random_payload;
+	uint32_t rnd_seed[3];
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	int he_loopback;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PMD_AFU_H__ */
diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map
new file mode 100644
index 0000000..c2e0723
--- /dev/null
+++ b/drivers/raw/afu_mf/version.map
@@ -0,0 +1,3 @@
+DPDK_22 {
+	local: *;
+};
diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
index 05e7de1..c3627f7 100644
--- a/drivers/raw/meson.build
+++ b/drivers/raw/meson.build
@@ -6,6 +6,7 @@ if is_windows
 endif
 
 drivers = [
+        'afu_mf',
         'cnxk_bphy',
         'cnxk_gpio',
         'dpaa2_cmdif',
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4] raw/afu_mf: introduce AFU MF device driver
  2022-05-19  2:43   ` [PATCH v3] " Wei Huang
@ 2022-05-19  5:52     ` Wei Huang
  2022-05-26  6:51       ` Xu, Rosen
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
  0 siblings, 2 replies; 57+ messages in thread
From: Wei Huang @ 2022-05-19  5:52 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

Add afu_mf driver to manage various AFU (Acceleration Function Unit)
in FPGA.

Signed-off-by: Wei Huang <wei.huang@intel.com>
Acked-by: Tianfei Zhang <tianfei.zhang@intel.com>
---
v2: fix typo
---
v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
---
v4: fix coding style issue and build error in FreeBSD13-64
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
 drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
 drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
 drivers/raw/afu_mf/he_hssi.h       |  102 ++
 drivers/raw/afu_mf/he_lbk.c        |  427 ++++++++
 drivers/raw/afu_mf/he_lbk.h        |  121 +++
 drivers/raw/afu_mf/he_mem.c        |  181 ++++
 drivers/raw/afu_mf/he_mem.h        |   40 +
 drivers/raw/afu_mf/meson.build     |    8 +
 drivers/raw/afu_mf/n3000_afu.c     | 2005 ++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
 drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
 drivers/raw/afu_mf/version.map     |    3 +
 drivers/raw/meson.build            |    1 +
 14 files changed, 4253 insertions(+)
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
 create mode 100644 drivers/raw/afu_mf/he_hssi.c
 create mode 100644 drivers/raw/afu_mf/he_hssi.h
 create mode 100644 drivers/raw/afu_mf/he_lbk.c
 create mode 100644 drivers/raw/afu_mf/he_lbk.h
 create mode 100644 drivers/raw/afu_mf/he_mem.c
 create mode 100644 drivers/raw/afu_mf/he_mem.h
 create mode 100644 drivers/raw/afu_mf/meson.build
 create mode 100644 drivers/raw/afu_mf/n3000_afu.c
 create mode 100644 drivers/raw/afu_mf/n3000_afu.h
 create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
 create mode 100644 drivers/raw/afu_mf/version.map

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
new file mode 100644
index 0000000..f24c748
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -0,0 +1,440 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_rawdev_pmd.h>
+
+#include "rte_pmd_afu.h"
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+#include "he_lbk.h"
+#include "he_mem.h"
+#include "he_hssi.h"
+
+#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
+
+static const struct rte_afu_uuid afu_uuid_map[] = {
+	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
+	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	{ HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	{ 0, 0 /* sentinel */ }
+};
+
+static struct afu_mf_drv *afu_table[] = {
+	&n3000_afu_drv,
+	&he_lbk_drv,
+	&he_mem_lbk_drv,
+	&he_mem_tg_drv,
+	&he_hssi_drv,
+	NULL
+};
+
+static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
+{
+	int32_t x = 0;
+
+	if (!dev || !dev->shared)
+		return -ENODEV;
+
+	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
+
+	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1,
+				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
+{
+	if (!dev || !dev->shared)
+		return;
+
+	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
+}
+
+static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
+	rte_rawdev_obj_t config, size_t config_size)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->config)
+		ret = (*dev->ops->config)(dev, config, config_size);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_start(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please start it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->start)
+		ret = (*dev->ops->start)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
+		return;
+	}
+
+	if (dev->ops && dev->ops->stop)
+		ret = (*dev->ops->stop)(dev);
+
+	afu_mf_unlock(dev);
+}
+
+static int afu_mf_rawdev_close(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->close)
+		ret = (*dev->ops->close)(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->reset)
+		ret = (*dev->ops->reset)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_selftest(uint16_t dev_id)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
+		return -ENODEV;
+
+	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
+	if (!dev)
+		return -ENOENT;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please test it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->test)
+		ret = (*dev->ops->test)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->dump)
+		ret = (*dev->ops->dump)(dev, f);
+
+	return ret;
+}
+
+static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
+	.dev_info_get = NULL,
+	.dev_configure = afu_mf_rawdev_configure,
+	.dev_start = afu_mf_rawdev_start,
+	.dev_stop = afu_mf_rawdev_stop,
+	.dev_close = afu_mf_rawdev_close,
+	.dev_reset = afu_mf_rawdev_reset,
+
+	.queue_def_conf = NULL,
+	.queue_setup = NULL,
+	.queue_release = NULL,
+	.queue_count = NULL,
+
+	.attr_get = NULL,
+	.attr_set = NULL,
+
+	.enqueue_bufs = NULL,
+	.dequeue_bufs = NULL,
+
+	.dump = afu_mf_rawdev_dump,
+
+	.xstats_get = NULL,
+	.xstats_get_names = NULL,
+	.xstats_get_by_name = NULL,
+	.xstats_reset = NULL,
+
+	.firmware_status_get = NULL,
+	.firmware_version_get = NULL,
+	.firmware_load = NULL,
+	.firmware_unload = NULL,
+
+	.dev_selftest = afu_mf_rawdev_selftest,
+};
+
+static int
+afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
+	int socket_id)
+{
+	const struct rte_memzone *mz;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	struct afu_mf_shared *ptr = NULL;
+	int init_mz = 0;
+
+	if (!name || !data)
+		return -EINVAL;
+
+	/* name format is afu_?|??:??.? which is unique */
+	snprintf(mz_name, sizeof(mz_name), "%s", name);
+
+	mz = rte_memzone_lookup(mz_name);
+	if (!mz) {
+		mz = rte_memzone_reserve(mz_name,
+				sizeof(struct afu_mf_shared),
+				socket_id, 0);
+		init_mz = 1;
+	}
+
+	if (!mz) {
+		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
+			mz_name);
+		return -ENOMEM;
+	}
+
+	ptr = (struct afu_mf_shared *)mz->addr;
+
+	if (init_mz)  /* initialize memory zone on the first time */
+		ptr->lock = 0;
+
+	*data = ptr;
+
+	return 0;
+}
+
+static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char *name,
+	size_t size)
+{
+	int n = 0;
+
+	if (!afu_dev || !name || !size)
+		return -EINVAL;
+
+	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
+	if (n >= (int)size) {
+		AFU_MF_PMD_ERR("Name of AFU device is too long!");
+		return -ENAMETOOLONG;
+	}
+
+	return 0;
+}
+
+static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
+{
+	struct afu_mf_drv *entry = NULL;
+	int i = 0;
+
+	if (!afu_id)
+		return NULL;
+
+	while ((entry = afu_table[i++])) {
+		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
+			(entry->uuid.uuid_high == afu_id->uuid_high))
+			break;
+	}
+
+	return entry ? entry->ops : NULL;
+}
+
+static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int socket_id)
+{
+	struct rte_rawdev *rawdev = NULL;
+	struct afu_mf_rawdev *dev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
+		name, socket_id);
+
+	/* Allocate device structure */
+	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct afu_mf_rawdev),
+				socket_id);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Unable to allocate raw device");
+		return -ENOMEM;
+	}
+
+	rawdev->dev_ops = &afu_mf_rawdev_ops;
+	rawdev->device = &afu_dev->device;
+	rawdev->driver_name = afu_dev->driver->driver.name;
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		goto cleanup;
+
+	dev->rawdev = rawdev;
+	dev->port = afu_dev->id.port;
+	dev->addr = afu_dev->mem_resource[0].addr;
+	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
+	if (dev->ops == NULL) {
+		AFU_MF_PMD_ERR("Unsupported AFU device");
+		goto cleanup;
+	}
+
+	if (dev->ops->init) {
+		ret = (*dev->ops->init)(dev);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed to init %s", name);
+			goto cleanup;
+		}
+	}
+
+	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
+	if (ret)
+		goto cleanup;
+
+	return ret;
+
+cleanup:
+	rte_rawdev_pmd_release(rawdev);
+	return ret;
+}
+
+static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev)
+{
+	struct rte_rawdev *rawdev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Destroy raw device %s", name);
+
+	rawdev = rte_rawdev_pmd_get_named_dev(name);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Raw device %s not found", name);
+		return -EINVAL;
+	}
+
+	/* rte_rawdev_close is called by pmd_release */
+	ret = rte_rawdev_pmd_release(rawdev);
+	if (ret)
+		AFU_MF_PMD_DEBUG("Device cleanup failed");
+
+	return 0;
+}
+
+static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_create(afu_dev, rte_socket_id());
+}
+
+static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_destroy(afu_dev);
+}
+
+static struct rte_afu_driver afu_mf_pmd_drv = {
+	.id_table = afu_uuid_map,
+	.probe = afu_mf_rawdev_probe,
+	.remove = afu_mf_rawdev_remove
+};
+
+RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv);
+RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h b/drivers/raw/afu_mf/afu_mf_rawdev.h
new file mode 100644
index 0000000..5a66f6c
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __AFU_MF_RAWDEV_H__
+#define __AFU_MF_RAWDEV_H__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+extern int afu_mf_pmd_logtype;
+
+#define AFU_MF_PMD_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
+		__func__, ##args)
+
+#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
+
+#define AFU_MF_PMD_DEBUG(fmt, args...) \
+	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
+#define AFU_MF_PMD_INFO(fmt, args...) \
+	AFU_MF_PMD_LOG(INFO, fmt, ## args)
+#define AFU_MF_PMD_ERR(fmt, args...) \
+	AFU_MF_PMD_LOG(ERR, fmt, ## args)
+#define AFU_MF_PMD_WARN(fmt, args...) \
+	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
+
+#define CLS_TO_SIZE(n)  ((n) << 6)  /* get size of n cache lines */
+#define SIZE_TO_CLS(s)  ((s) >> 6)  /* convert size to number of cache lines */
+#define MHZ(f)  ((f) * 1000000)
+
+#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
+({                                                       \
+	uint64_t __wait = 0;                                 \
+	uint64_t __invl = (invl);                            \
+	uint64_t __timeout = (timeout);                      \
+	for (; __wait <= __timeout; __wait += __invl) {      \
+		(val) = *(addr);                                 \
+		if (cond)                                        \
+			break;                                       \
+		rte_delay_ms(__invl);                            \
+	}                                                    \
+	(cond) ? 0 : 1;                                      \
+})
+
+struct afu_mf_rawdev;
+
+struct afu_mf_ops {
+	int (*init)(struct afu_mf_rawdev *dev);
+	int (*config)(struct afu_mf_rawdev *dev, void *config,
+		size_t config_size);
+	int (*start)(struct afu_mf_rawdev *dev);
+	int (*stop)(struct afu_mf_rawdev *dev);
+	int (*test)(struct afu_mf_rawdev *dev);
+	int (*close)(struct afu_mf_rawdev *dev);
+	int (*reset)(struct afu_mf_rawdev *dev);
+	int (*dump)(struct afu_mf_rawdev *dev, FILE *f);
+};
+
+struct afu_mf_drv {
+	struct rte_afu_uuid uuid;
+	struct afu_mf_ops *ops;
+};
+
+struct afu_mf_shared {
+	int32_t lock;
+};
+
+struct afu_mf_rawdev {
+	struct rte_rawdev *rawdev;  /* point to parent raw device */
+	struct afu_mf_shared *shared;  /* shared data for multi-process */
+	struct afu_mf_ops *ops;  /* device operation functions */
+	int port;  /* index of port the AFU attached */
+	void *addr;  /* base address of AFU registers */
+	void *priv;  /* private driver data */
+};
+
+static inline struct afu_mf_rawdev *
+afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev)
+{
+	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL;
+}
+
+#endif /* __AFU_MF_RAWDEV_H__ */
diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c
new file mode 100644
index 0000000..bedafbd
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_hssi.h"
+
+static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr, value);
+
+	if (!ctx)
+		return -EINVAL;
+
+	data.write_data = value;
+	rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
+
+	cmd.csr = 0;
+	cmd.write_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t *value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	cmd.csr = 0;
+	cmd.read_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans) {
+			data.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_DATA);
+			*value = data.read_data;
+			break;
+		}
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr, *value);
+	return 0;
+}
+
+static void he_hssi_report(struct he_hssi_ctx *ctx)
+{
+	uint32_t val = 0;
+	uint64_t v64 = 0;
+	int ret = 0;
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
+	if (ret)
+		return;
+	printf("Number of good packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
+	if (ret)
+		return;
+	printf("Number of bad packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
+	if (ret)
+		return;
+	v64 = val;
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
+	if (ret)
+		return;
+	v64 = (v64 << 32) | val;
+	printf("Number of bytes received: %"PRIu64"\n", v64);
+
+	ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
+	if (ret)
+		return;
+	if (val & ERR_VALID) {
+		printf("AVST rx error:");
+		if (val & OVERFLOW_ERR)
+			printf(" overflow");
+		if (val & LENGTH_ERR)
+			printf(" length");
+		if (val & OVERSIZE_ERR)
+			printf(" oversize");
+		if (val & UNDERSIZE_ERR)
+			printf(" undersize");
+		if (val & MAC_CRC_ERR)
+			printf(" crc");
+		if (val & PHY_ERR)
+			printf(" phy");
+		printf("\n");
+	}
+
+	ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
+	if (ret)
+		return;
+	if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
+		printf("FIFO status:");
+		if (val & ALMOST_EMPTY)
+			printf(" almost empty");
+		if (val & ALMOST_FULL)
+			printf(" almost full");
+		printf("\n");
+	}
+}
+
+static int he_hssi_test(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+	struct traffic_ctrl_ch_sel sel;
+	uint32_t val = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_hssi_cfg;
+	ctx = &priv->he_hssi_ctx;
+
+	ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
+	if (ret)
+		return ret;
+
+	sel.channel_sel = cfg->port;
+	rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
+
+	if (cfg->he_loopback >= 0) {
+		val = cfg->he_loopback ? 1 : 0;
+		AFU_MF_PMD_INFO("%s HE loopback on port %u",
+			val ? "Enable" : "Disable", cfg->port);
+		return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
+	if (ret)
+		return ret;
+
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
+	if (ret)
+		return ret;
+
+	val = cfg->src_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->src_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->dest_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->dest_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_length ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_payload ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 3; i++) {
+		ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
+			cfg->rnd_seed[i]);
+		if (ret)
+			return ret;
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
+	if (ret)
+		return ret;
+
+	while (i++ < cfg->timeout) {
+		ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
+		if (ret)
+			break;
+		if (val == cfg->num_packets)
+			break;
+		sleep(1);
+	}
+
+	he_hssi_report(ctx);
+
+	return ret;
+}
+
+static int he_hssi_init(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_hssi_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
+	if (cfg->port >= NUM_HE_HSSI_PORTS)
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
+
+	return 0;
+}
+
+static int he_hssi_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_hssi_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_hssi_ops = {
+	.init = he_hssi_init,
+	.config = he_hssi_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_hssi_test,
+	.close = he_hssi_close,
+	.dump = he_hssi_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_hssi_drv = {
+	.uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	.ops = &he_hssi_ops
+};
diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h
new file mode 100644
index 0000000..f8b9623
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_HSSI_H_
+#define _HE_HSSI_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_HSSI_UUID_L    0xbb370242ac130002
+#define HE_HSSI_UUID_H    0x823c334c98bf11ea
+#define NUM_HE_HSSI_PORTS 8
+
+extern struct afu_mf_drv he_hssi_drv;
+
+/* HE-HSSI registers definition */
+#define TRAFFIC_CTRL_CMD    0x30
+#define TRAFFIC_CTRL_DATA   0x38
+#define TRAFFIC_CTRL_CH_SEL 0x40
+#define AFU_SCRATCHPAD      0x48
+
+#define TG_NUM_PKT        0x3c00
+#define TG_PKT_LEN_TYPE   0x3c01
+#define TG_DATA_PATTERN   0x3c02
+#define TG_START_XFR      0x3c03
+#define TG_STOP_XFR       0x3c04
+#define TG_SRC_MAC_L      0x3c05
+#define TG_SRC_MAC_H      0x3c06
+#define TG_DST_MAC_L      0x3c07
+#define TG_DST_MAC_H      0x3c08
+#define TG_PKT_XFRD       0x3c09
+#define TG_RANDOM_SEED(n) (0x3c0a + (n))
+#define TG_PKT_LEN        0x3c0d
+
+#define TM_NUM_PKT        0x3d00
+#define TM_PKT_GOOD       0x3d01
+#define TM_PKT_BAD        0x3d02
+#define TM_BYTE_CNT0      0x3d03
+#define TM_BYTE_CNT1      0x3d04
+#define TM_AVST_RX_ERR    0x3d07
+#define   OVERFLOW_ERR    (1 << 9)
+#define   LENGTH_ERR      (1 << 8)
+#define   OVERSIZE_ERR    (1 << 7)
+#define   UNDERSIZE_ERR   (1 << 6)
+#define   MAC_CRC_ERR     (1 << 5)
+#define   PHY_ERR         (1 << 4)
+#define   ERR_VALID       (1 << 3)
+
+#define LOOPBACK_EN          0x3e00
+#define LOOPBACK_FIFO_STATUS 0x3e01
+#define   ALMOST_EMPTY    (1 << 1)
+#define   ALMOST_FULL     (1 << 0)
+
+#define MAILBOX_TIMEOUT_MS       100
+#define MAILBOX_POLL_INTERVAL_MS 10
+
+struct traffic_ctrl_cmd {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_cmd:1;
+			uint32_t write_cmd:1;
+			uint32_t ack_trans:1;
+			uint32_t rsvd1:29;
+			uint32_t afu_cmd_addr:16;
+			uint32_t rsvd2:16;
+		};
+	};
+};
+
+struct traffic_ctrl_data {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_data;
+			uint32_t write_data;
+		};
+	};
+};
+
+struct traffic_ctrl_ch_sel {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t channel_sel:3;
+			uint32_t rsvd1:29;
+			uint32_t rsvd2;
+		};
+	};
+};
+
+struct he_hssi_ctx {
+	uint8_t *addr;
+};
+
+struct he_hssi_priv {
+	struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
+	struct he_hssi_ctx he_hssi_ctx;
+};
+
+#endif /* _HE_HSSI_H_ */
diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c
new file mode 100644
index 0000000..8735647
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_lbk.h"
+
+static int he_lbk_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	v.mode = cfg->mode;
+	v.trput_interleave = cfg->trput_interleave;
+	if (cfg->multi_cl == 4)
+		v.multicl_len = 2;
+	else
+		v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_dsm_status *stat = NULL;
+	struct he_lbk_status0 stat0;
+	struct he_lbk_status1 stat1;
+	uint64_t swtest_msg = 0;
+	uint64_t ticks = 0;
+	uint64_t info = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	stat = ctx->status_ptr;
+
+	swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
+	stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
+	stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0) {
+		info = rte_read64(ctx->addr + CSR_HE_INFO0);
+		AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
+		cfg->freq_mhz = info & 0xffff;
+		if (cfg->freq_mhz == 0) {
+			AFU_MF_PMD_INFO("Frequency of AFU clock is unknown."
+				" Assuming 350 MHz.");
+			cfg->freq_mhz = 350;
+		}
+	}
+
+	num = (double)stat0.num_reads;
+	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat0.num_writes;
+	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Pend_Read Pend_Write "
+		"Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
+		cfg->freq_mhz);
+	printf("%10u  %10u %10u %10u %10u  %12"PRIu64
+		"   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat0.num_reads, stat0.num_writes,
+		stat1.num_pend_reads, stat1.num_pend_writes,
+		ticks, rd_bw / 1e9, wr_bw / 1e9);
+	printf("Test Message: 0x%"PRIx64"\n", swtest_msg);
+}
+
+static int he_lbk_test(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	rte_delay_us(1000);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	/* initialize DMA addresses */
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
+
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
+
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write32(SIZE_TO_CLS(ctx->dsm_iova), ctx->addr + CSR_AFU_DSM_BASEL);
+	rte_write32(SIZE_TO_CLS(ctx->dsm_iova) >> 32,
+		ctx->addr + CSR_AFU_DSM_BASEH);
+
+	ret = he_lbk_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CLS_TO_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		rte_delay_us(1000);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		he_lbk_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		if (cfg->mode == NLB_MODE_LPBK) {
+			ptr = (uint32_t *)ctx->dest_ptr;
+			j = CLS_TO_SIZE(cl) >> 2;
+			for (i = 0; i < j; i++) {
+				if (*ptr++ != i) {
+					AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+					break;
+				}
+			}
+		}
+	}
+
+end:
+	return 0;
+}
+
+static int he_lbk_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int he_lbk_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
+	return 0;
+
+release:
+	he_lbk_ctx_release(dev);
+	return ret;
+}
+
+static int he_lbk_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return he_lbk_ctx_init(dev);
+}
+
+static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
+	if (cfg->mode > NLB_MODE_TRPUT)
+		return -EINVAL;
+	if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
+		(cfg->multi_cl != 4))
+		return -EINVAL;
+	if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin > MAX_CACHE_LINES))
+		return -EINVAL;
+	if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
+
+	return 0;
+}
+
+static int he_lbk_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	he_lbk_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_lbk_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+	fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+	fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+	fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+	fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+	fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+	fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+	fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_lbk_ops = {
+	.init = he_lbk_init,
+	.config = he_lbk_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_lbk_test,
+	.close = he_lbk_close,
+	.dump = he_lbk_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_lbk_drv = {
+	.uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
+
+struct afu_mf_drv he_mem_lbk_drv = {
+	.uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h
new file mode 100644
index 0000000..c2e8a29
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_LBK_H_
+#define _HE_LBK_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_LBK_UUID_L      0xb94b12284c31e02b
+#define HE_LBK_UUID_H      0x56e203e9864f49a7
+#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb
+#define HE_MEM_LBK_UUID_H  0x8568ab4e6ba54616
+
+extern struct afu_mf_drv he_lbk_drv;
+extern struct afu_mf_drv he_mem_lbk_drv;
+
+/* HE-LBK & HE-MEM-LBK registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct he_lbk_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct he_lbk_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t rsvd1:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd2:13;
+			uint32_t trput_interleave:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t rsvd3:2;
+		};
+	};
+};
+
+struct he_lbk_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct he_lbk_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct he_lbk_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+struct he_lbk_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct he_lbk_dsm_status *status_ptr;
+};
+
+struct he_lbk_priv {
+	struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
+	struct he_lbk_ctx he_lbk_ctx;
+};
+
+#endif /* _HE_LBK_H_ */
diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
new file mode 100644
index 0000000..ccbb3a8
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_mem.h"
+
+static int he_mem_tg_test(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+	uint64_t value = 0x12345678;
+	uint64_t cap = 0;
+	uint64_t channel_mask = 0;
+	int i, t = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_mem_tg_cfg;
+	ctx = &priv->he_mem_tg_ctx;
+
+	AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
+
+	rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
+	cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
+	AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
+	if (cap != value) {
+		AFU_MF_PMD_ERR("Test scratchpad register failed");
+		return -EIO;
+	}
+
+	cap = rte_read64(ctx->addr + MEM_TG_CTRL);
+	AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
+
+	channel_mask = cfg->channel_mask & cap;
+	/* start traffic generators */
+	rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
+
+	/* check test status */
+	while (t < MEM_TG_TIMEOUT_MS) {
+		value = rte_read64(ctx->addr + MEM_TG_STAT);
+		for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
+			if (channel_mask & (1 << i)) {
+				if (TGACTIVE(value, i))
+					continue;
+				printf("TG channel %d test %s\n", i,
+					TGPASS(value, i) ? "pass" :
+					TGTIMEOUT(value, i) ? "timeout" :
+					TGFAIL(value, i) ? "fail" : "error");
+				channel_mask &= ~(1 << i);
+			}
+		}
+		if (!channel_mask)
+			break;
+		rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
+		t += MEM_TG_POLL_INTERVAL_MS;
+	}
+
+	if (channel_mask) {
+		AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value);
+		return channel_mask;
+	}
+
+	return 0;
+}
+
+static int he_mem_tg_init(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_mem_tg_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_mem_tg_priv *priv = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv->he_mem_tg_cfg));
+
+	return 0;
+}
+
+static int he_mem_tg_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_mem_tg_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_mem_tg_ops = {
+	.init = he_mem_tg_init,
+	.config = he_mem_tg_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_mem_tg_test,
+	.close = he_mem_tg_close,
+	.dump = he_mem_tg_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_mem_tg_drv = {
+	.uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	.ops = &he_mem_tg_ops
+};
diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h
new file mode 100644
index 0000000..82404b6
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_MEM_H_
+#define _HE_MEM_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
+#define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
+
+#define NUM_MEM_TG_CHANNELS      4
+#define MEM_TG_TIMEOUT_MS     5000
+#define MEM_TG_POLL_INTERVAL_MS 10
+
+extern struct afu_mf_drv he_mem_tg_drv;
+
+/* MEM-TG registers definition */
+#define MEM_TG_SCRATCHPAD   0x28
+#define MEM_TG_CTRL         0x30
+#define   TGCONTROL(n)      (1 << (n))
+#define MEM_TG_STAT         0x38
+#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
+#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
+#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
+#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
+#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
+
+struct he_mem_tg_ctx {
+	uint8_t *addr;
+};
+
+struct he_mem_tg_priv {
+	struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
+	struct he_mem_tg_ctx he_mem_tg_ctx;
+};
+
+#endif /* _HE_MEM_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
new file mode 100644
index 0000000..f304bc8
--- /dev/null
+++ b/drivers/raw/afu_mf/meson.build
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022 Intel Corporation
+
+deps += ['rawdev', 'bus_pci', 'bus_ifpga']
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
+	'he_hssi.c')
+
+headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c
new file mode 100644
index 0000000..19d7c54
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.c
@@ -0,0 +1,2005 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+
+static int nlb_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->nlb_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	if (cfg->cache_policy == NLB_WRPUSH_I)
+		v.wrpush_i = 1;
+	else
+		v.wrthru_en = cfg->cache_policy;
+
+	if (cfg->cache_hint == NLB_RDLINE_MIXED)
+		v.rdsel = 3;
+	else
+		v.rdsel = cfg->cache_hint;
+
+	v.mode = cfg->mode;
+	v.chsel = cfg->read_vc;
+	v.wr_chsel = cfg->write_vc;
+	v.wrfence_chsel = cfg->wrfence_vc;
+	v.wrthru_en = cfg->cache_policy;
+	v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_dsm_status *stat = NULL;
+	uint64_t ticks = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	cfg = &priv->nlb_cfg;
+	stat = priv->nlb_ctx.status_ptr;
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0)
+		cfg->freq_mhz = 200;
+
+	num = (double)stat->num_reads;
+	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat->num_writes;
+	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
+		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
+	printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat->num_reads, stat->num_writes, ticks,
+		rd_bw / 1e9, wr_bw / 1e9);
+}
+
+static int nlb_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	ctx = &priv->nlb_ctx;
+	cfg = &priv->nlb_cfg;
+
+	/* initialize registers */
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
+
+	ret = nlb_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CLS_TO_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
+
+		rte_delay_us(10);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		nlb_afu_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		ptr = (uint32_t *)ctx->dest_ptr;
+		j = CLS_TO_SIZE(cl) >> 2;
+		for (i = 0; i < j; i++) {
+			if (*ptr++ != i) {
+				AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+				break;
+			}
+		}
+	}
+
+end:
+	return ret;
+}
+
+static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
+{
+	int i = 0;
+
+	if (!ctx)
+		return;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		rte_free(ctx->dma_buf[i]);
+		ctx->dma_buf[i] = NULL;
+	}
+
+	rte_free(ctx->data_buf);
+	ctx->data_buf = NULL;
+
+	rte_free(ctx->ref_buf);
+	ctx->ref_buf = NULL;
+}
+
+static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
+	struct rte_pmd_afu_dma_cfg *cfg)
+{
+	size_t page_sz = sysconf(_SC_PAGE_SIZE);
+	int i, ret = 0;
+
+	if (!ctx || !cfg)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
+			TEST_MEM_ALIGN);
+		if (!ctx->dma_buf[i]) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
+		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
+	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->data_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->ref_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
+{
+	int *ptr = NULL;
+	size_t i = 0;
+	size_t dword_size = 0;
+
+	if (!ctx || !size)
+		return;
+
+	ptr = (int *)ctx->ref_buf;
+
+	if (ctx->pattern) {
+		memset(ptr, ctx->pattern, size);
+	} else {
+		srand(99);
+		dword_size = size >> 2;
+		for (i = 0; i < dword_size; i++)
+			*ptr++ = rand();
+	}
+	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
+}
+
+static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
+{
+	uint8_t *src = NULL;
+	uint8_t *dst = NULL;
+	size_t i = 0;
+	int n = 0;
+
+	if (!ctx || !size)
+		return -EINVAL;
+
+	src = (uint8_t *)ctx->ref_buf;
+	dst = (uint8_t *)ctx->data_buf;
+
+	if (memcmp(src, dst, size)) {
+		printf("Transfer is corrupted\n");
+		if (ctx->verbose) {
+			for (i = 0; i < size; i++) {
+				if (*src != *dst) {
+					if (++n >= ERR_CHECK_LIMIT)
+						break;
+					printf("Mismatch at 0x%zx, "
+						"Expected %02x  Actual %02x\n",
+						i, *src, *dst);
+				}
+				src++;
+				dst++;
+			}
+			if (n < ERR_CHECK_LIMIT) {
+				printf("Found %d error bytes\n", n);
+			} else {
+				printf("......\n");
+				printf("Found more than %d error bytes\n", n);
+			}
+		}
+		return -1;
+	}
+
+	printf("Transfer is verified\n");
+	return 0;
+}
+
+static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		rte_write64(*host_addr, dev_addr);
+}
+
+static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		*host_addr = rte_read64(dev_addr);
+}
+
+static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
+{
+	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
+
+	if (!ctx)
+		return;
+
+	if (requested_page != ctx->cur_ase_page) {
+		rte_write64(requested_page, ctx->ase_ctrl_addr);
+		ctx->cur_ase_page = requested_page;
+	}
+}
+
+static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
+
+	/* write back to device */
+	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
+
+	return 0;
+}
+
+static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(dst))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(dst)) {
+		/* Write out a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+
+		rte_write32(*(uint32_t *)(uintptr_t)src,
+			ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Write out blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)(uintptr_t)src, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Write out remaining DWORD */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		rte_write32(*(uint32_t *)(uintptr_t)src,
+			ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t count)
+{
+	uint64_t dst = *dst_ptr;
+	uint64_t src = *src_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* aligns address to 8 byte using dst masking method */
+	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
+		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		src += unaligned_size;
+		dst += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_write(ctx, &dst, &src, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using dst masking method */
+	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
+
+	return 0;
+}
+
+static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(src))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(src)) {
+		/* Read a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)(uintptr_t)dst =
+			rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Read blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= src & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)(uintptr_t)dst, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Read remaining DWORD */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)(uintptr_t)dst =
+			rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* Aligns address to 8 byte using src masking method */
+	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
+		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		dst += unaligned_size;
+		src += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_read(ctx, &src, &dst, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using src masking method */
+	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static void clear_interrupt(struct dma_afu_ctx *ctx)
+{
+	/* clear interrupt by writing 1 to IRQ bit in status register */
+	msgdma_status status;
+
+	if (!ctx)
+		return;
+
+	status.csr = 0;
+	status.irq = 1;
+	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
+}
+
+static int poll_interrupt(struct dma_afu_ctx *ctx)
+{
+	struct pollfd pfd = {0};
+	uint64_t count = 0;
+	ssize_t bytes_read = 0;
+	int poll_ret = 0;
+	int ret = 0;
+
+	if (!ctx || (ctx->event_fd < 0))
+		return -EINVAL;
+
+	pfd.fd = ctx->event_fd;
+	pfd.events = POLLIN;
+	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
+	if (poll_ret < 0) {
+		AFU_MF_PMD_ERR("Error %s", strerror(errno));
+		ret = -EFAULT;
+		goto out;
+	} else if (poll_ret == 0) {
+		AFU_MF_PMD_ERR("Timeout");
+		ret = -ETIMEDOUT;
+	} else {
+		bytes_read = read(pfd.fd, &count, sizeof(count));
+		if (bytes_read > 0) {
+			if (ctx->verbose)
+				AFU_MF_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
+					poll_ret, count);
+			ret = 0;
+		} else {
+			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
+				strerror(errno) : "zero bytes read");
+			ret = -EIO;
+		}
+	}
+out:
+	clear_interrupt(ctx);
+	return ret;
+}
+
+static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
+{
+	msgdma_status status;
+	uint64_t fpga_queue_full = 0;
+
+	if (!ctx)
+		return;
+
+	if (ctx->verbose) {
+		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
+			desc->rd_address_ext, desc->rd_address);
+		AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
+			desc->wr_address_ext, desc->wr_address);
+		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
+		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
+			desc->wr_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
+			desc->rd_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
+		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
+	}
+
+	do {
+		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
+		if (fpga_queue_full++ > 100000000) {
+			AFU_MF_PMD_DEBUG("DMA queue full retry");
+			fpga_queue_full = 0;
+		}
+	} while (status.desc_buf_full);
+
+	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
+		sizeof(*desc));
+}
+
+static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	int count, int is_last_desc, fpga_dma_type type, int intr_en)
+{
+	msgdma_ext_desc *desc = NULL;
+	int alignment_offset = 0;
+	int segment_size = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	/* src, dst and count must be 64-byte aligned */
+	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
+		!IS_DMA_ALIGNED(count))
+		return -EINVAL;
+	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
+
+	/* these fields are fixed for all DMA transfers */
+	desc = ctx->desc_buf;
+	desc->seq_num = 0;
+	desc->wr_stride = 1;
+	desc->rd_stride = 1;
+	desc->control.go = 1;
+	if (intr_en)
+		desc->control.transfer_irq_en = 1;
+	else
+		desc->control.transfer_irq_en = 0;
+
+	if (!is_last_desc)
+		desc->control.early_done_en = 1;
+	else
+		desc->control.early_done_en = 0;
+
+	if (type == FPGA_TO_FPGA) {
+		desc->rd_address = src & DMA_MASK_32_BIT;
+		desc->wr_address = dst & DMA_MASK_32_BIT;
+		desc->len = count;
+		desc->wr_burst_count = 4;
+		desc->rd_burst_count = 4;
+		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+		send_descriptor(ctx, desc);
+	} else {
+		/* check CCIP (host) address is aligned to 4CL (256B) */
+		alignment_offset = (type == HOST_TO_FPGA)
+			? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
+		/* performing a short transfer to get aligned */
+		if (alignment_offset != 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* count isn't large enough to hit next 4CL boundary */
+			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = CCIP_ALIGN_BYTES
+					- alignment_offset;
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			/* post short transfer to align to a 4CL (256 byte) */
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* at this point we are 4CL (256 byte) aligned */
+		if (count >= CCIP_ALIGN_BYTES) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 4;
+			desc->rd_burst_count = 4;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* buffer ends on 4CL boundary */
+			if ((count % CCIP_ALIGN_BYTES) == 0) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = count
+					- (count % CCIP_ALIGN_BYTES);
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* post short transfer to handle the remainder */
+		if (count > 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->len = count;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			if (intr_en)
+				desc->control.transfer_irq_en = 1;
+			send_descriptor(ctx, desc);
+		}
+	}
+
+	return 0;
+}
+
+static int issue_magic(struct dma_afu_ctx *ctx)
+{
+	*(ctx->magic_buf) = 0ULL;
+	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
+		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
+}
+
+static void wait_magic(struct dma_afu_ctx *ctx)
+{
+	int magic_timeout = 0;
+
+	if (!ctx)
+		return;
+
+	poll_interrupt(ctx);
+	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
+		if (magic_timeout++ > 1000) {
+			AFU_MF_PMD_ERR("DMA magic operation timeout");
+			magic_timeout = 0;
+			break;
+		}
+	}
+	*(ctx->magic_buf) = 0ULL;
+}
+
+static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, int *intr_issued)
+{
+	int intr_en = 0;
+	int ret = 0;
+
+	if (!ctx || !intr_issued)
+		return -EINVAL;
+
+	src += chunk * ctx->dma_buf_size;
+	dst += chunk * ctx->dma_buf_size;
+
+	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
+		if (*intr_issued) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+		intr_en = 1;
+	}
+
+	chunk %= NUM_DMA_BUF;
+	rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
+		ctx->dma_buf_size);
+	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
+			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
+	if (intr_en)
+		*intr_issued = 1;
+
+	return ret;
+}
+
+static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t dma_tx_bytes = 0;
+	uint64_t offset = 0;
+	int issued_intr = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(dst)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_host_to_fpga(ctx, &dst, &src, count_left);
+
+		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
+			* DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - dst;
+		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_tx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1), &issued_intr);
+			if (ret)
+				return ret;
+		}
+
+		if (issued_intr) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+
+		if (count_left) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_tx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_tx_bytes);
+				rte_memcpy(ctx->dma_buf[0],
+					(void *)(uintptr_t)(src + offset),
+					dma_tx_bytes);
+				ret = do_dma(ctx, dst + offset,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
+				if (ret)
+					return ret;
+				ret = poll_interrupt(ctx);
+				if (ret)
+					return ret;
+			}
+
+			count_left -= dma_tx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_tx_bytes;
+				src += offset + dma_tx_bytes;
+				ret = ase_host_to_fpga(ctx, &dst, &src,
+					count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
+{
+	uint64_t i = chunk % NUM_DMA_BUF;
+	uint64_t n = *rx_count;
+	uint64_t num_pending = 0;
+	int ret = 0;
+
+	if (!ctx || !wf_issued)
+		return -EINVAL;
+
+	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
+		src + chunk * ctx->dma_buf_size,
+		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
+	if (ret)
+		return ret;
+
+	num_pending = chunk - n + 1;
+	if (num_pending == HALF_DMA_BUF) {
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
+		if (*wf_issued) {
+			wait_magic(ctx);
+			for (i = 0; i < HALF_DMA_BUF; i++) {
+				rte_memcpy((void *)(uintptr_t)(dst +
+						n * ctx->dma_buf_size),
+					ctx->dma_buf[n % NUM_DMA_BUF],
+					ctx->dma_buf_size);
+				n++;
+			}
+			*wf_issued = 0;
+			*rx_count = n;
+		}
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t pending_buf = 0;
+	uint64_t dma_rx_bytes = 0;
+	uint64_t offset = 0;
+	int wf_issued = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(src)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_fpga_to_host(ctx, &src, &dst, count_left);
+
+		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
+			 * DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - src;
+		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_rx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1),
+				&pending_buf, &wf_issued);
+			if (ret)
+				return ret;
+		}
+
+		if (wf_issued)
+			wait_magic(ctx);
+
+		/* clear out final dma memcpy operations */
+		while (pending_buf < dma_chunks) {
+			/* constant size transfer; no length check required */
+			rte_memcpy((void *)(uintptr_t)(dst +
+					pending_buf * ctx->dma_buf_size),
+				ctx->dma_buf[pending_buf % NUM_DMA_BUF],
+				ctx->dma_buf_size);
+			pending_buf++;
+		}
+
+		if (count_left > 0) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_rx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_rx_bytes);
+				ret = do_dma(ctx,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					src + offset,
+					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
+				if (ret)
+					return ret;
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+				rte_memcpy((void *)(uintptr_t)(dst + offset),
+					ctx->dma_buf[0], dma_rx_bytes);
+			}
+
+			count_left -= dma_rx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_rx_bytes;
+				src += offset + dma_rx_bytes;
+				ret = ase_fpga_to_host(ctx, &src, &dst,
+							count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t dma_chunks = 0;
+	uint64_t offset = 0;
+	uint32_t tx_chunks = 0;
+	uint64_t *tmp_buf = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
+	    && IS_DMA_ALIGNED(count_left)) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
+				(i == (dma_chunks - 1))) {
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+			}
+		}
+
+		if (count_left > 0) {
+			AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
+			ret = do_dma(ctx, dst + offset, src + offset,
+				count_left, 1, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			ret = issue_magic(ctx);
+			if (ret)
+				return ret;
+			wait_magic(ctx);
+		}
+	} else {
+		if ((src < dst) && (src + count_left > dst)) {
+			AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
+				" -> 0x%"PRIx64" (0x%"PRIx64")",
+				src, dst, count_left);
+			return -EINVAL;
+		}
+		tx_chunks = count_left / ctx->dma_buf_size;
+		offset = tx_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
+			" (%u...0x%"PRIx64")",
+			src, dst, tx_chunks, count_left);
+		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
+			DMA_ALIGN_BYTES);
+		for (i = 0; i < tx_chunks; i++) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx,
+				dst + i * ctx->dma_buf_size,
+				(uint64_t)tmp_buf, ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+		}
+
+		if (count_left > 0) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + offset, count_left);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx, dst + offset,
+				(uint64_t)tmp_buf, count_left);
+			if (ret)
+				goto free_buf;
+		}
+free_buf:
+		rte_free(tmp_buf);
+	}
+
+	return ret;
+}
+
+static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
+	uint64_t src, size_t count, fpga_dma_type type)
+{
+	int ret = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (type == HOST_TO_FPGA)
+		ret = dma_host_to_fpga(ctx, dst, src, count);
+	else if (type == FPGA_TO_HOST)
+		ret = dma_fpga_to_host(ctx, dst, src, count);
+	else if (type == FPGA_TO_FPGA)
+		ret = dma_fpga_to_fpga(ctx, dst, src, count);
+	else
+		return -EINVAL;
+
+	return ret;
+}
+
+static double getTime(struct timespec start, struct timespec end)
+{
+	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
+		+ end.tv_nsec - start.tv_nsec;
+	return (double)diff / (double)1000000000L;
+}
+
+#define SWEEP_ITERS 1
+static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
+	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
+{
+	struct timespec start, end;
+	uint64_t test_size = 0;
+	uint64_t *dma_buf_ptr = NULL;
+	double throughput, total_time = 0.0;
+	int i = 0;
+	int ret = 0;
+
+	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
+		AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
+		return -EINVAL;
+	}
+
+	if (length < (buf_offset + size_decrement)) {
+		AFU_MF_PMD_ERR("Test length does not match unaligned parameter");
+		return -EINVAL;
+	}
+	test_size = length - (buf_offset + size_decrement);
+	if ((ddr_offset + test_size) > ctx->mem_size) {
+		AFU_MF_PMD_ERR("Test is out of DDR memory space");
+		return -EINVAL;
+	}
+
+	dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
+	printf("Sweep Host %p to FPGA 0x%"PRIx64
+		" with 0x%"PRIx64" bytes ...\n",
+		(void *)dma_buf_ptr, ddr_offset, test_size);
+
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
+			test_size, HOST_TO_FPGA);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
+		ddr_offset, (void *)dma_buf_ptr, test_size);
+
+	total_time = 0.0;
+	memset((char *)dma_buf_ptr, 0, test_size);
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
+			test_size, FPGA_TO_HOST);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Verifying buffer ...\n");
+	return dma_afu_buf_verify(ctx, test_size);
+}
+
+static int dma_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_dma_cfg *cfg = NULL;
+	msgdma_ctrl ctrl;
+	uint64_t offset = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->dma_cfg;
+	if (cfg->index >= NUM_N3000_DMA)
+		return -EINVAL;
+	ctx = &priv->dma_ctx[cfg->index];
+
+	ctx->pattern = (int)cfg->pattern;
+	ctx->verbose = (int)cfg->verbose;
+	ctx->dma_buf_size = cfg->size;
+
+	ret = dma_afu_buf_alloc(ctx, cfg);
+	if (ret)
+		goto free;
+
+	printf("Initialize test buffer\n");
+	dma_afu_buf_init(ctx, cfg->length);
+
+	/* enable interrupt */
+	ctrl.csr = 0;
+	ctrl.global_intr_en_mask = 1;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
+		cfg->offset, cfg->length);
+	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
+		cfg->length, HOST_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from host to FPGA");
+		goto end;
+	}
+	memset(ctx->data_buf, 0, cfg->length);
+
+	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
+		offset = cfg->offset + cfg->length;
+	else if (cfg->offset > cfg->length)
+		offset = 0;
+	else
+		goto end;
+
+	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
+		cfg->offset, offset, cfg->length);
+	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
+		FPGA_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to FPGA");
+		goto end;
+	}
+
+	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	printf("Sweep with aligned address and size\n");
+	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
+	if (ret)
+		goto end;
+
+	if (cfg->unaligned) {
+		printf("Sweep with unaligned address and size\n");
+		struct unaligned_set {
+			uint64_t addr_offset;
+			uint64_t size_dec;
+		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
+		for (i = 0; i < ARRAY_SIZE(param); i++) {
+			ret = sweep_test(ctx, cfg->length, cfg->offset,
+				param[i].addr_offset, param[i].size_dec);
+			if (ret)
+				break;
+		}
+	}
+
+end:
+	/* disable interrupt */
+	ctrl.global_intr_en_mask = 0;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev *dev)
+{
+	struct rte_afu_device *afudev = NULL;
+
+	if (!dev || !dev->rawdev || !dev->rawdev->device)
+		return NULL;
+
+	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
+	if (!afudev->rawdev || !afudev->rawdev->device)
+		return NULL;
+
+	return RTE_DEV_TO_PCI(afudev->rawdev->device);
+}
+
+#ifdef VFIO_PRESENT
+static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
+	uint32_t count, int *efds)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	struct vfio_irq_set *irq_set = NULL;
+	int vfio_dev_fd = 0;
+	size_t sz = 0;
+	int ret = 0;
+
+	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
+		return -EINVAL;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return -ENODEV;
+	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
+
+	sz = sizeof(*irq_set) + sizeof(*efds) * count;
+	irq_set = rte_zmalloc(NULL, sz, 0);
+	if (!irq_set)
+		return -ENOMEM;
+
+	irq_set->argsz = (uint32_t)sz;
+	irq_set->count = count;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+		VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = vec_start;
+
+	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
+	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret)
+		AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
+
+	rte_free(irq_set);
+	return ret;
+}
+#endif
+
+static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint32_t bar = 0;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return NULL;
+
+	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
+	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
+	if (!PORT_IMPLEMENTED(val)) {
+		AFU_MF_PMD_INFO("FIU port %d is not implemented", dev->port);
+		return NULL;
+	}
+
+	bar = PORT_BAR(val);
+	if (bar >= PCI_MAX_RESOURCE) {
+		AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
+		return NULL;
+	}
+
+	addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
+	return addr;
+}
+
+static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
+	uint32_t *vec_start, uint32_t *vec_count)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint64_t header = 0;
+	uint64_t next_offset = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
+			(DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
+			val = rte_read64(addr + PORT_UINT_CAP_REG);
+			if (vec_start)
+				*vec_start = PORT_VEC_START(val);
+			if (vec_count)
+				*vec_count = PORT_VEC_COUNT(val);
+			return 0;
+		}
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return -ENOENT;
+}
+
+static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+	ctx->addr = addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
+	return 0;
+
+release:
+	nlb_afu_ctx_release(dev);
+	return ret;
+}
+
+static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[0];
+
+	rte_free(ctx->desc_buf);
+	ctx->desc_buf = NULL;
+
+	rte_free(ctx->magic_buf);
+	ctx->magic_buf = NULL;
+
+	close(ctx->event_fd);
+	return 0;
+}
+
+static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
+	static int efds[1] = {0};
+	uint32_t vec_start = 0;
+	int ret = 0;
+
+	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[index];
+	ctx->index = index;
+	ctx->addr = addr;
+	ctx->csr_addr = addr + DMA_CSR;
+	ctx->desc_addr = addr + DMA_DESC;
+	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
+	ctx->ase_data_addr = addr + DMA_ASE_DATA;
+	ctx->mem_size = mem_sz[ctx->index];
+	ctx->cur_ase_page = INVALID_ASE_PAGE;
+	if (ctx->index == 0) {
+		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
+		if (ret)
+			return ret;
+
+		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+		if (efds[0] < 0) {
+			AFU_MF_PMD_ERR("eventfd create failed");
+			return -EBADF;
+		}
+#ifdef VFIO_PRESENT
+		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
+			AFU_MF_PMD_ERR("DMA interrupt setup failed");
+#endif
+	}
+	ctx->event_fd = efds[0];
+
+	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
+		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
+	if (!ctx->desc_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->magic_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
+	if (ctx->magic_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	return 0;
+
+release:
+	dma_afu_ctx_release(dev);
+	return ret;
+}
+
+static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	uint8_t *addr = NULL;
+	uint64_t header = 0;
+	uint64_t uuid_hi = 0;
+	uint64_t uuid_lo = 0;
+	uint64_t next_offset = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	addr = (uint8_t *)dev->addr;
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
+		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
+
+		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
+			(uuid_lo == N3000_NLB0_UUID_L) &&
+			(uuid_hi == N3000_NLB0_UUID_H)) {
+			AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
+			ret = nlb_afu_ctx_init(dev, addr);
+			if (ret)
+				return ret;
+		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
+			(uuid_lo == N3000_DMA_UUID_L) &&
+			(uuid_hi == N3000_DMA_UUID_H) &&
+			(priv->num_dma < NUM_N3000_DMA)) {
+			AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
+				priv->num_dma, (void *)addr);
+			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
+			if (ret)
+				return ret;
+			priv->num_dma++;
+		} else {
+			AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
+				", uuid %016"PRIx64"%016"PRIx64,
+				DFH_TYPE(header), uuid_hi, uuid_lo);
+		}
+
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return 0;
+}
+
+static int n3000_afu_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return n3000_afu_ctx_init(dev);
+}
+
+static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
+	int i = 0;
+	uint64_t top = 0;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
+	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
+		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
+			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
+			return -EINVAL;
+		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.multi_cl != 1) &&
+			(cfg->nlb_cfg.multi_cl != 2) &&
+			(cfg->nlb_cfg.multi_cl != 4))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
+			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
+			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
+			return -EINVAL;
+		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
+			sizeof(struct rte_pmd_afu_nlb_cfg));
+	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
+		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
+			return -EINVAL;
+		i = cfg->dma_cfg.index;
+		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
+		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
+			return -EINVAL;
+		if (i == 3) {  /* QDR connected to DMA3 */
+			if (cfg->dma_cfg.length & 0x3f) {
+				cfg->dma_cfg.length &= ~0x3f;
+				AFU_MF_PMD_INFO("Round size to %x for QDR",
+					cfg->dma_cfg.length);
+			}
+		}
+		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
+			sizeof(struct rte_pmd_afu_dma_cfg));
+	} else {
+		AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
+		return -EINVAL;
+	}
+
+	priv->cfg_type = cfg->type;
+	return 0;
+}
+
+static int n3000_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		AFU_MF_PMD_INFO("Test NLB");
+		ret = nlb_afu_test(dev);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
+		ret = dma_afu_test(dev);
+	} else {
+		AFU_MF_PMD_ERR("Please configure AFU before test");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int n3000_afu_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	nlb_afu_ctx_release(dev);
+	dma_afu_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct n3000_afu_priv *priv = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+		fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+		fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+		fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
+		fprintf(f, "index:\t\t%d\n", ctx->index);
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
+		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
+		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
+		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
+		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
+		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
+		fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int n3000_afu_reset(struct afu_mf_rawdev *dev)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	val = rte_read64(addr + PORT_CTRL_REG);
+	val |= PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+	rte_delay_us(100);
+	val &= ~PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+
+	return 0;
+}
+
+static struct afu_mf_ops n3000_afu_ops = {
+	.init = n3000_afu_init,
+	.config = n3000_afu_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = n3000_afu_test,
+	.close = n3000_afu_close,
+	.dump = n3000_afu_dump,
+	.reset = n3000_afu_reset
+};
+
+struct afu_mf_drv n3000_afu_drv = {
+	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	.ops = &n3000_afu_ops
+};
diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h
new file mode 100644
index 0000000..4c740da
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _N3000_AFU_H_
+#define _N3000_AFU_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define N3000_AFU_UUID_L  0xc000c9660d824272
+#define N3000_AFU_UUID_H  0x9aeffe5f84570612
+#define N3000_NLB0_UUID_L 0xf89e433683f9040b
+#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
+#define N3000_DMA_UUID_L  0xa9149a35bace01ea
+#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
+
+extern struct afu_mf_drv n3000_afu_drv;
+
+#define NUM_N3000_DMA  4
+#define MAX_MSIX_VEC   7
+
+/* N3000 DFL definition */
+#define DFH_UUID_L_OFFSET  8
+#define DFH_UUID_H_OFFSET  16
+#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
+#define DFH_TYPE_AFU  1
+#define DFH_TYPE_BBB  2
+#define DFH_TYPE_PRIVATE  3
+#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
+#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
+#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
+#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
+#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
+#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
+#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
+#define PORT_FEATURE_UINT_ID  0x12
+#define PORT_UINT_CAP_REG  0x8
+#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
+#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
+#define PORT_CTRL_REG  0x38
+#define PORT_SOFT_RESET  (0x1 << 0)
+
+/* NLB registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_STATUS         0x40
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct nlb_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct nlb_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t wrthru_en:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd1:1;
+			uint32_t delay_en:1;
+			uint32_t rdsel:2;
+			uint32_t rsvd2:1;
+			uint32_t chsel:3;
+			uint32_t rsvd3:1;
+			uint32_t wrpush_i:1;
+			uint32_t wr_chsel:3;
+			uint32_t rsvd4:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t wrfence_chsel:2;
+		};
+	};
+};
+
+struct nlb_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct nlb_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct nlb_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+/* DMA registers definition */
+#define DMA_CSR       0x40
+#define DMA_DESC      0x60
+#define DMA_ASE_CTRL  0x200
+#define DMA_ASE_DATA  0x1000
+
+#define DMA_ASE_WINDOW       4096
+#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
+#define INVALID_ASE_PAGE     0xffffffffffffffffULL
+
+#define DMA_WF_MAGIC             0x5772745F53796E63ULL
+#define DMA_WF_MAGIC_ROM         0x1000000000000
+#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
+#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
+
+#define NUM_DMA_BUF   8
+#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
+
+#define DMA_MASK_32_BIT 0xFFFFFFFF
+
+#define DMA_CSR_BUSY           0x1
+#define DMA_DESC_BUFFER_EMPTY  0x2
+#define DMA_DESC_BUFFER_FULL   0x4
+
+#define DWORD_BYTES 4
+#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
+
+#define QWORD_BYTES 8
+#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
+
+#define DMA_ALIGN_BYTES 64
+#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
+
+#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
+
+#define DMA_TIMEOUT_MSEC  5000
+
+#define MAGIC_BUF_SIZE  64
+#define ERR_CHECK_LIMIT  64
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+typedef enum {
+	HOST_TO_FPGA = 0,
+	FPGA_TO_HOST,
+	FPGA_TO_FPGA,
+	FPGA_MAX_TRANSFER_TYPE,
+} fpga_dma_type;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t tx_channel:8;
+		uint32_t generate_sop:1;
+		uint32_t generate_eop:1;
+		uint32_t park_reads:1;
+		uint32_t park_writes:1;
+		uint32_t end_on_eop:1;
+		uint32_t reserved_1:1;
+		uint32_t transfer_irq_en:1;
+		uint32_t early_term_irq_en:1;
+		uint32_t trans_error_irq_en:8;
+		uint32_t early_done_en:1;
+		uint32_t reserved_2:6;
+		uint32_t go:1;
+	};
+} msgdma_desc_ctrl;
+
+typedef struct __rte_packed {
+	uint32_t rd_address;
+	uint32_t wr_address;
+	uint32_t len;
+	uint16_t seq_num;
+	uint8_t rd_burst_count;
+	uint8_t wr_burst_count;
+	uint16_t rd_stride;
+	uint16_t wr_stride;
+	uint32_t rd_address_ext;
+	uint32_t wr_address_ext;
+	msgdma_desc_ctrl control;
+} msgdma_ext_desc;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t busy:1;
+		uint32_t desc_buf_empty:1;
+		uint32_t desc_buf_full:1;
+		uint32_t rsp_buf_empty:1;
+		uint32_t rsp_buf_full:1;
+		uint32_t stopped:1;
+		uint32_t resetting:1;
+		uint32_t stopped_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t irq:1;
+		uint32_t reserved:22;
+	};
+} msgdma_status;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t stop_dispatcher:1;
+		uint32_t reset_dispatcher:1;
+		uint32_t stop_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t global_intr_en_mask:1;
+		uint32_t stop_descriptors:1;
+		uint32_t reserved:22;
+	};
+} msgdma_ctrl;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_fill_level:16;
+		uint32_t wr_fill_level:16;
+	};
+} msgdma_fill_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rsp_fill_level:16;
+		uint32_t reserved:16;
+	};
+} msgdma_rsp_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_seq_num:16;
+		uint32_t wr_seq_num:16;
+	};
+} msgdma_seq_num;
+
+typedef struct __rte_packed {
+	msgdma_status status;
+	msgdma_ctrl ctrl;
+	msgdma_fill_level fill_level;
+	msgdma_rsp_level rsp;
+	msgdma_seq_num seq_num;
+} msgdma_csr;
+
+#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
+#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
+
+struct nlb_afu_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct nlb_dsm_status *status_ptr;
+};
+
+struct dma_afu_ctx {
+	int index;
+	uint8_t *addr;
+	uint8_t *csr_addr;
+	uint8_t *desc_addr;
+	uint8_t *ase_ctrl_addr;
+	uint8_t *ase_data_addr;
+	uint64_t mem_size;
+	uint64_t cur_ase_page;
+	int event_fd;
+	int verbose;
+	int pattern;
+	void *data_buf;
+	void *ref_buf;
+	msgdma_ext_desc *desc_buf;
+	uint64_t *magic_buf;
+	uint64_t magic_iova;
+	uint32_t dma_buf_size;
+	uint64_t *dma_buf[NUM_DMA_BUF];
+	uint64_t dma_iova[NUM_DMA_BUF];
+};
+
+struct n3000_afu_priv {
+	struct rte_pmd_afu_nlb_cfg nlb_cfg;
+	struct rte_pmd_afu_dma_cfg dma_cfg;
+	struct nlb_afu_ctx nlb_ctx;
+	struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
+	int num_dma;
+	int cfg_type;
+};
+
+#endif /* _N3000_AFU_H_ */
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
new file mode 100644
index 0000000..89d866a
--- /dev/null
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __RTE_PMD_AFU_H__
+#define __RTE_PMD_AFU_H__
+
+/**
+ * @file rte_pmd_afu.h
+ *
+ * AFU PMD specific definitions.
+ *
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_PMD_AFU_N3000_NLB   1
+#define RTE_PMD_AFU_N3000_DMA   2
+
+#define NLB_MODE_LPBK      0
+#define NLB_MODE_READ      1
+#define NLB_MODE_WRITE     2
+#define NLB_MODE_TRPUT     3
+
+#define NLB_VC_AUTO        0
+#define NLB_VC_VL0         1
+#define NLB_VC_VH0         2
+#define NLB_VC_VH1         3
+#define NLB_VC_RANDOM      4
+
+#define NLB_WRLINE_M       0
+#define NLB_WRLINE_I       1
+#define NLB_WRPUSH_I       2
+
+#define NLB_RDLINE_S       0
+#define NLB_RDLINE_I       1
+#define NLB_RDLINE_MIXED   2
+
+#define MIN_CACHE_LINES   1
+#define MAX_CACHE_LINES   1024
+
+#define MIN_DMA_BUF_SIZE  64
+#define MAX_DMA_BUF_SIZE  (1023 * 1024)
+
+/**
+ * NLB AFU configuration data structure.
+ */
+struct rte_pmd_afu_nlb_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t cache_policy;
+	uint32_t cache_hint;
+	uint32_t read_vc;
+	uint32_t write_vc;
+	uint32_t wrfence_vc;
+	uint32_t freq_mhz;
+};
+
+/**
+ * DMA AFU configuration data structure.
+ */
+struct rte_pmd_afu_dma_cfg {
+	uint32_t index;     /* index of DMA controller */
+	uint32_t length;    /* total length of data to DMA */
+	uint32_t offset;    /* address offset of target memory */
+	uint32_t size;      /* size of transfer buffer */
+	uint32_t pattern;   /* data pattern to fill in test buffer */
+	uint32_t unaligned; /* use unaligned address or length in sweep test */
+	uint32_t verbose;   /* enable verbose error information in test */
+};
+
+/**
+ * N3000 AFU configuration data structure.
+ */
+struct rte_pmd_afu_n3000_cfg {
+	int type;   /* RTE_PMD_AFU_N3000_NLB or RTE_PMD_AFU_N3000_DMA */
+	union {
+		struct rte_pmd_afu_nlb_cfg nlb_cfg;
+		struct rte_pmd_afu_dma_cfg dma_cfg;
+	};
+};
+
+/**
+ * HE-LBK & HE-MEM-LBK AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_lbk_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t trput_interleave;
+	uint32_t freq_mhz;
+};
+
+/**
+ * HE-MEM-TG AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_mem_tg_cfg {
+	uint32_t channel_mask;   /* mask of traffic generator channel */
+};
+
+/**
+ * HE-HSSI AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_hssi_cfg {
+	uint32_t port;
+	uint32_t timeout;
+	uint32_t num_packets;
+	uint32_t random_length;
+	uint32_t packet_length;
+	uint32_t random_payload;
+	uint32_t rnd_seed[3];
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	int he_loopback;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PMD_AFU_H__ */
diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map
new file mode 100644
index 0000000..c2e0723
--- /dev/null
+++ b/drivers/raw/afu_mf/version.map
@@ -0,0 +1,3 @@
+DPDK_22 {
+	local: *;
+};
diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
index 05e7de1..c3627f7 100644
--- a/drivers/raw/meson.build
+++ b/drivers/raw/meson.build
@@ -6,6 +6,7 @@ if is_windows
 endif
 
 drivers = [
+        'afu_mf',
         'cnxk_bphy',
         'cnxk_gpio',
         'dpaa2_cmdif',
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v4] raw/afu_mf: introduce AFU MF device driver
  2022-05-19  5:52     ` [PATCH v4] " Wei Huang
@ 2022-05-26  6:51       ` Xu, Rosen
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
  1 sibling, 0 replies; 57+ messages in thread
From: Xu, Rosen @ 2022-05-26  6:51 UTC (permalink / raw)
  To: Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Zhang, Tianfei, Zhang, Qi Z

Hi Wei,

A huge patch, can you split it into several patches?

Thanks,
Rosen

> -----Original Message-----
> From: Huang, Wei <wei.huang@intel.com>
> Sent: Thursday, May 19, 2022 13:53
> To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>
> Subject: [PATCH v4] raw/afu_mf: introduce AFU MF device driver
> 
> Add afu_mf driver to manage various AFU (Acceleration Function Unit)
> in FPGA.
> 
> Signed-off-by: Wei Huang <wei.huang@intel.com>
> Acked-by: Tianfei Zhang <tianfei.zhang@intel.com>
> ---
> v2: fix typo
> ---
> v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
> ---
> v4: fix coding style issue and build error in FreeBSD13-64
> ---
>  drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
>  drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
>  drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
>  drivers/raw/afu_mf/he_hssi.h       |  102 ++
>  drivers/raw/afu_mf/he_lbk.c        |  427 ++++++++
>  drivers/raw/afu_mf/he_lbk.h        |  121 +++
>  drivers/raw/afu_mf/he_mem.c        |  181 ++++
>  drivers/raw/afu_mf/he_mem.h        |   40 +
>  drivers/raw/afu_mf/meson.build     |    8 +
>  drivers/raw/afu_mf/n3000_afu.c     | 2005
> ++++++++++++++++++++++++++++++++++++
>  drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
>  drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
>  drivers/raw/afu_mf/version.map     |    3 +
>  drivers/raw/meson.build            |    1 +
>  14 files changed, 4253 insertions(+)
>  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
>  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
>  create mode 100644 drivers/raw/afu_mf/he_hssi.c
>  create mode 100644 drivers/raw/afu_mf/he_hssi.h
>  create mode 100644 drivers/raw/afu_mf/he_lbk.c
>  create mode 100644 drivers/raw/afu_mf/he_lbk.h
>  create mode 100644 drivers/raw/afu_mf/he_mem.c
>  create mode 100644 drivers/raw/afu_mf/he_mem.h
>  create mode 100644 drivers/raw/afu_mf/meson.build
>  create mode 100644 drivers/raw/afu_mf/n3000_afu.c
>  create mode 100644 drivers/raw/afu_mf/n3000_afu.h
>  create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
>  create mode 100644 drivers/raw/afu_mf/version.map
> 
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> b/drivers/raw/afu_mf/afu_mf_rawdev.c
> new file mode 100644
> index 0000000..f24c748
> --- /dev/null
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> @@ -0,0 +1,440 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memzone.h>
> +#include <rte_rawdev_pmd.h>
> +
> +#include "rte_pmd_afu.h"
> +#include "afu_mf_rawdev.h"
> +#include "n3000_afu.h"
> +#include "he_lbk.h"
> +#include "he_mem.h"
> +#include "he_hssi.h"
> +
> +#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> +
> +static const struct rte_afu_uuid afu_uuid_map[] = {
> +	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
> +	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
> +	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
> +	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
> +	{ HE_HSSI_UUID_L, HE_HSSI_UUID_H },
> +	{ 0, 0 /* sentinel */ }
> +};
> +
> +static struct afu_mf_drv *afu_table[] = {
> +	&n3000_afu_drv,
> +	&he_lbk_drv,
> +	&he_mem_lbk_drv,
> +	&he_mem_tg_drv,
> +	&he_hssi_drv,
> +	NULL
> +};
> +
> +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
> +{
> +	int32_t x = 0;
> +
> +	if (!dev || !dev->shared)
> +		return -ENODEV;
> +
> +	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
> +
> +	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock,
> &x, 1,
> +				1, __ATOMIC_ACQUIRE,
> __ATOMIC_RELAXED) == 0))
> +		return -EBUSY;
> +
> +	return 0;
> +}
> +
> +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev || !dev->shared)
> +		return;
> +
> +	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
> +}
> +
> +static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
> +	rte_rawdev_obj_t config, size_t config_size)
> +{
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	if (dev->ops && dev->ops->config)
> +		ret = (*dev->ops->config)(dev, config, config_size);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_start(struct rte_rawdev *rawdev)
> +{
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please start it later");
> +		return ret;
> +	}
> +
> +	if (dev->ops && dev->ops->start)
> +		ret = (*dev->ops->start)(dev);
> +
> +	afu_mf_unlock(dev);
> +
> +	return ret;
> +}
> +
> +static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev)
> +{
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
> +		return;
> +	}
> +
> +	if (dev->ops && dev->ops->stop)
> +		ret = (*dev->ops->stop)(dev);
> +
> +	afu_mf_unlock(dev);
> +}
> +
> +static int afu_mf_rawdev_close(struct rte_rawdev *rawdev)
> +{
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	if (dev->ops && dev->ops->close)
> +		ret = (*dev->ops->close)(dev);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev)
> +{
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
> +		return ret;
> +	}
> +
> +	if (dev->ops && dev->ops->reset)
> +		ret = (*dev->ops->reset)(dev);
> +
> +	afu_mf_unlock(dev);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_selftest(uint16_t dev_id)
> +{
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
> +		return -ENODEV;
> +
> +	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
> +	if (!dev)
> +		return -ENOENT;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please test it later");
> +		return ret;
> +	}
> +
> +	if (dev->ops && dev->ops->test)
> +		ret = (*dev->ops->test)(dev);
> +
> +	afu_mf_unlock(dev);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f)
> +{
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	if (dev->ops && dev->ops->dump)
> +		ret = (*dev->ops->dump)(dev, f);
> +
> +	return ret;
> +}
> +
> +static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
> +	.dev_info_get = NULL,
> +	.dev_configure = afu_mf_rawdev_configure,
> +	.dev_start = afu_mf_rawdev_start,
> +	.dev_stop = afu_mf_rawdev_stop,
> +	.dev_close = afu_mf_rawdev_close,
> +	.dev_reset = afu_mf_rawdev_reset,
> +
> +	.queue_def_conf = NULL,
> +	.queue_setup = NULL,
> +	.queue_release = NULL,
> +	.queue_count = NULL,
> +
> +	.attr_get = NULL,
> +	.attr_set = NULL,
> +
> +	.enqueue_bufs = NULL,
> +	.dequeue_bufs = NULL,
> +
> +	.dump = afu_mf_rawdev_dump,
> +
> +	.xstats_get = NULL,
> +	.xstats_get_names = NULL,
> +	.xstats_get_by_name = NULL,
> +	.xstats_reset = NULL,
> +
> +	.firmware_status_get = NULL,
> +	.firmware_version_get = NULL,
> +	.firmware_load = NULL,
> +	.firmware_unload = NULL,
> +
> +	.dev_selftest = afu_mf_rawdev_selftest,
> +};
> +
> +static int
> +afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
> +	int socket_id)
> +{
> +	const struct rte_memzone *mz;
> +	char mz_name[RTE_MEMZONE_NAMESIZE];
> +	struct afu_mf_shared *ptr = NULL;
> +	int init_mz = 0;
> +
> +	if (!name || !data)
> +		return -EINVAL;
> +
> +	/* name format is afu_?|??:??.? which is unique */
> +	snprintf(mz_name, sizeof(mz_name), "%s", name);
> +
> +	mz = rte_memzone_lookup(mz_name);
> +	if (!mz) {
> +		mz = rte_memzone_reserve(mz_name,
> +				sizeof(struct afu_mf_shared),
> +				socket_id, 0);
> +		init_mz = 1;
> +	}
> +
> +	if (!mz) {
> +		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
> +			mz_name);
> +		return -ENOMEM;
> +	}
> +
> +	ptr = (struct afu_mf_shared *)mz->addr;
> +
> +	if (init_mz)  /* initialize memory zone on the first time */
> +		ptr->lock = 0;
> +
> +	*data = ptr;
> +
> +	return 0;
> +}
> +
> +static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char
> *name,
> +	size_t size)
> +{
> +	int n = 0;
> +
> +	if (!afu_dev || !name || !size)
> +		return -EINVAL;
> +
> +	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
> +	if (n >= (int)size) {
> +		AFU_MF_PMD_ERR("Name of AFU device is too long!");
> +		return -ENAMETOOLONG;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
> +{
> +	struct afu_mf_drv *entry = NULL;
> +	int i = 0;
> +
> +	if (!afu_id)
> +		return NULL;
> +
> +	while ((entry = afu_table[i++])) {
> +		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
> +			(entry->uuid.uuid_high == afu_id->uuid_high))
> +			break;
> +	}
> +
> +	return entry ? entry->ops : NULL;
> +}
> +
> +static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int
> socket_id)
> +{
> +	struct rte_rawdev *rawdev = NULL;
> +	struct afu_mf_rawdev *dev = NULL;
> +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> +	int ret = 0;
> +
> +	if (!afu_dev)
> +		return -EINVAL;
> +
> +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> +	if (ret)
> +		return ret;
> +
> +	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
> +		name, socket_id);
> +
> +	/* Allocate device structure */
> +	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct
> afu_mf_rawdev),
> +				socket_id);
> +	if (!rawdev) {
> +		AFU_MF_PMD_ERR("Unable to allocate raw device");
> +		return -ENOMEM;
> +	}
> +
> +	rawdev->dev_ops = &afu_mf_rawdev_ops;
> +	rawdev->device = &afu_dev->device;
> +	rawdev->driver_name = afu_dev->driver->driver.name;
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		goto cleanup;
> +
> +	dev->rawdev = rawdev;
> +	dev->port = afu_dev->id.port;
> +	dev->addr = afu_dev->mem_resource[0].addr;
> +	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
> +	if (dev->ops == NULL) {
> +		AFU_MF_PMD_ERR("Unsupported AFU device");
> +		goto cleanup;
> +	}
> +
> +	if (dev->ops->init) {
> +		ret = (*dev->ops->init)(dev);
> +		if (ret) {
> +			AFU_MF_PMD_ERR("Failed to init %s", name);
> +			goto cleanup;
> +		}
> +	}
> +
> +	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
> +	if (ret)
> +		goto cleanup;
> +
> +	return ret;
> +
> +cleanup:
> +	rte_rawdev_pmd_release(rawdev);
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev)
> +{
> +	struct rte_rawdev *rawdev = NULL;
> +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> +	int ret = 0;
> +
> +	if (!afu_dev)
> +		return -EINVAL;
> +
> +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> +	if (ret)
> +		return ret;
> +
> +	AFU_MF_PMD_INFO("Destroy raw device %s", name);
> +
> +	rawdev = rte_rawdev_pmd_get_named_dev(name);
> +	if (!rawdev) {
> +		AFU_MF_PMD_ERR("Raw device %s not found", name);
> +		return -EINVAL;
> +	}
> +
> +	/* rte_rawdev_close is called by pmd_release */
> +	ret = rte_rawdev_pmd_release(rawdev);
> +	if (ret)
> +		AFU_MF_PMD_DEBUG("Device cleanup failed");
> +
> +	return 0;
> +}
> +
> +static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev)
> +{
> +	AFU_MF_PMD_FUNC_TRACE();
> +	return afu_mf_rawdev_create(afu_dev, rte_socket_id());
> +}
> +
> +static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev)
> +{
> +	AFU_MF_PMD_FUNC_TRACE();
> +	return afu_mf_rawdev_destroy(afu_dev);
> +}
> +
> +static struct rte_afu_driver afu_mf_pmd_drv = {
> +	.id_table = afu_uuid_map,
> +	.probe = afu_mf_rawdev_probe,
> +	.remove = afu_mf_rawdev_remove
> +};
> +
> +RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME,
> afu_mf_pmd_drv);
> +RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h
> b/drivers/raw/afu_mf/afu_mf_rawdev.h
> new file mode 100644
> index 0000000..5a66f6c
> --- /dev/null
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
> @@ -0,0 +1,89 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2022 Intel Corporation
> + */
> +
> +#ifndef __AFU_MF_RAWDEV_H__
> +#define __AFU_MF_RAWDEV_H__
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +
> +#include <rte_cycles.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +extern int afu_mf_pmd_logtype;
> +
> +#define AFU_MF_PMD_LOG(level, fmt, args...) \
> +	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n",
> \
> +		__func__, ##args)
> +
> +#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
> +
> +#define AFU_MF_PMD_DEBUG(fmt, args...) \
> +	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
> +#define AFU_MF_PMD_INFO(fmt, args...) \
> +	AFU_MF_PMD_LOG(INFO, fmt, ## args)
> +#define AFU_MF_PMD_ERR(fmt, args...) \
> +	AFU_MF_PMD_LOG(ERR, fmt, ## args)
> +#define AFU_MF_PMD_WARN(fmt, args...) \
> +	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
> +
> +#define CLS_TO_SIZE(n)  ((n) << 6)  /* get size of n cache lines */
> +#define SIZE_TO_CLS(s)  ((s) >> 6)  /* convert size to number of cache lines
> */
> +#define MHZ(f)  ((f) * 1000000)
> +
> +#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
> +({                                                       \
> +	uint64_t __wait = 0;                                 \
> +	uint64_t __invl = (invl);                            \
> +	uint64_t __timeout = (timeout);                      \
> +	for (; __wait <= __timeout; __wait += __invl) {      \
> +		(val) = *(addr);                                 \
> +		if (cond)                                        \
> +			break;                                       \
> +		rte_delay_ms(__invl);                            \
> +	}                                                    \
> +	(cond) ? 0 : 1;                                      \
> +})
> +
> +struct afu_mf_rawdev;
> +
> +struct afu_mf_ops {
> +	int (*init)(struct afu_mf_rawdev *dev);
> +	int (*config)(struct afu_mf_rawdev *dev, void *config,
> +		size_t config_size);
> +	int (*start)(struct afu_mf_rawdev *dev);
> +	int (*stop)(struct afu_mf_rawdev *dev);
> +	int (*test)(struct afu_mf_rawdev *dev);
> +	int (*close)(struct afu_mf_rawdev *dev);
> +	int (*reset)(struct afu_mf_rawdev *dev);
> +	int (*dump)(struct afu_mf_rawdev *dev, FILE *f);
> +};
> +
> +struct afu_mf_drv {
> +	struct rte_afu_uuid uuid;
> +	struct afu_mf_ops *ops;
> +};
> +
> +struct afu_mf_shared {
> +	int32_t lock;
> +};
> +
> +struct afu_mf_rawdev {
> +	struct rte_rawdev *rawdev;  /* point to parent raw device */
> +	struct afu_mf_shared *shared;  /* shared data for multi-process */
> +	struct afu_mf_ops *ops;  /* device operation functions */
> +	int port;  /* index of port the AFU attached */
> +	void *addr;  /* base address of AFU registers */
> +	void *priv;  /* private driver data */
> +};
> +
> +static inline struct afu_mf_rawdev *
> +afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev)
> +{
> +	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private :
> NULL;
> +}
> +
> +#endif /* __AFU_MF_RAWDEV_H__ */
> diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c
> new file mode 100644
> index 0000000..bedafbd
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_hssi.c
> @@ -0,0 +1,369 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "he_hssi.h"
> +
> +static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
> +	uint32_t value)
> +{
> +	struct traffic_ctrl_cmd cmd;
> +	struct traffic_ctrl_data data;
> +	uint32_t i = 0;
> +
> +	AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr,
> value);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	data.write_data = value;
> +	rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
> +
> +	cmd.csr = 0;
> +	cmd.write_cmd = 1;
> +	cmd.afu_cmd_addr = addr;
> +	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (cmd.ack_trans)
> +			break;
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	i = 0;
> +	cmd.csr = 0;
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		cmd.ack_trans = 1;
> +		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (!cmd.ack_trans)
> +			break;
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	return 0;
> +}
> +
> +static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
> +	uint32_t *value)
> +{
> +	struct traffic_ctrl_cmd cmd;
> +	struct traffic_ctrl_data data;
> +	uint32_t i = 0;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	cmd.csr = 0;
> +	cmd.read_cmd = 1;
> +	cmd.afu_cmd_addr = addr;
> +	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (cmd.ack_trans) {
> +			data.csr = rte_read64(ctx->addr +
> TRAFFIC_CTRL_DATA);
> +			*value = data.read_data;
> +			break;
> +		}
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	i = 0;
> +	cmd.csr = 0;
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		cmd.ack_trans = 1;
> +		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (!cmd.ack_trans)
> +			break;
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr,
> *value);
> +	return 0;
> +}
> +
> +static void he_hssi_report(struct he_hssi_ctx *ctx)
> +{
> +	uint32_t val = 0;
> +	uint64_t v64 = 0;
> +	int ret = 0;
> +
> +	ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
> +	if (ret)
> +		return;
> +	printf("Number of good packets received: %u\n", val);
> +
> +	ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
> +	if (ret)
> +		return;
> +	printf("Number of bad packets received: %u\n", val);
> +
> +	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
> +	if (ret)
> +		return;
> +	v64 = val;
> +	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
> +	if (ret)
> +		return;
> +	v64 = (v64 << 32) | val;
> +	printf("Number of bytes received: %"PRIu64"\n", v64);
> +
> +	ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
> +	if (ret)
> +		return;
> +	if (val & ERR_VALID) {
> +		printf("AVST rx error:");
> +		if (val & OVERFLOW_ERR)
> +			printf(" overflow");
> +		if (val & LENGTH_ERR)
> +			printf(" length");
> +		if (val & OVERSIZE_ERR)
> +			printf(" oversize");
> +		if (val & UNDERSIZE_ERR)
> +			printf(" undersize");
> +		if (val & MAC_CRC_ERR)
> +			printf(" crc");
> +		if (val & PHY_ERR)
> +			printf(" phy");
> +		printf("\n");
> +	}
> +
> +	ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
> +	if (ret)
> +		return;
> +	if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
> +		printf("FIFO status:");
> +		if (val & ALMOST_EMPTY)
> +			printf(" almost empty");
> +		if (val & ALMOST_FULL)
> +			printf(" almost full");
> +		printf("\n");
> +	}
> +}
> +
> +static int he_hssi_test(struct afu_mf_rawdev *dev)
> +{
> +	struct he_hssi_priv *priv = NULL;
> +	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
> +	struct he_hssi_ctx *ctx = NULL;
> +	struct traffic_ctrl_ch_sel sel;
> +	uint32_t val = 0;
> +	uint32_t i = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_hssi_cfg;
> +	ctx = &priv->he_hssi_ctx;
> +
> +	ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
> +	if (ret)
> +		return ret;
> +
> +	sel.channel_sel = cfg->port;
> +	rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
> +
> +	if (cfg->he_loopback >= 0) {
> +		val = cfg->he_loopback ? 1 : 0;
> +		AFU_MF_PMD_INFO("%s HE loopback on port %u",
> +			val ? "Enable" : "Disable", cfg->port);
> +		return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
> +	}
> +
> +	ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
> +	if (ret)
> +		return ret;
> +
> +	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->src_addr & 0xffffffff;
> +	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
> +	if (ret)
> +		return ret;
> +	val = (cfg->src_addr >> 32) & 0xffff;
> +	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->dest_addr & 0xffffffff;
> +	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
> +	if (ret)
> +		return ret;
> +	val = (cfg->dest_addr >> 32) & 0xffff;
> +	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->random_length ? 1 : 0;
> +	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->random_payload ? 1 : 0;
> +	ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < 3; i++) {
> +		ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
> +			cfg->rnd_seed[i]);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
> +	if (ret)
> +		return ret;
> +
> +	while (i++ < cfg->timeout) {
> +		ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
> +		if (ret)
> +			break;
> +		if (val == cfg->num_packets)
> +			break;
> +		sleep(1);
> +	}
> +
> +	he_hssi_report(ctx);
> +
> +	return ret;
> +}
> +
> +static int he_hssi_init(struct afu_mf_rawdev *dev)
> +{
> +	struct he_hssi_priv *priv = NULL;
> +	struct he_hssi_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv) {
> +		priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
> +		if (!priv)
> +			return -ENOMEM;
> +		dev->priv = priv;
> +	}
> +
> +	ctx = &priv->he_hssi_ctx;
> +	ctx->addr = (uint8_t *)dev->addr;
> +
> +	return 0;
> +}
> +
> +static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct he_hssi_priv *priv = NULL;
> +	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
> +		return -EINVAL;
> +
> +	cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
> +	if (cfg->port >= NUM_HE_HSSI_PORTS)
> +		return -EINVAL;
> +
> +	rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
> +
> +	return 0;
> +}
> +
> +static int he_hssi_close(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f)
> +{
> +	struct he_hssi_priv *priv = NULL;
> +	struct he_hssi_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	ctx = &priv->he_hssi_ctx;
> +
> +	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops he_hssi_ops = {
> +	.init = he_hssi_init,
> +	.config = he_hssi_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = he_hssi_test,
> +	.close = he_hssi_close,
> +	.dump = he_hssi_dump,
> +	.reset = NULL
> +};
> +
> +struct afu_mf_drv he_hssi_drv = {
> +	.uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
> +	.ops = &he_hssi_ops
> +};
> diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h
> new file mode 100644
> index 0000000..f8b9623
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_hssi.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _HE_HSSI_H_
> +#define _HE_HSSI_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define HE_HSSI_UUID_L    0xbb370242ac130002
> +#define HE_HSSI_UUID_H    0x823c334c98bf11ea
> +#define NUM_HE_HSSI_PORTS 8
> +
> +extern struct afu_mf_drv he_hssi_drv;
> +
> +/* HE-HSSI registers definition */
> +#define TRAFFIC_CTRL_CMD    0x30
> +#define TRAFFIC_CTRL_DATA   0x38
> +#define TRAFFIC_CTRL_CH_SEL 0x40
> +#define AFU_SCRATCHPAD      0x48
> +
> +#define TG_NUM_PKT        0x3c00
> +#define TG_PKT_LEN_TYPE   0x3c01
> +#define TG_DATA_PATTERN   0x3c02
> +#define TG_START_XFR      0x3c03
> +#define TG_STOP_XFR       0x3c04
> +#define TG_SRC_MAC_L      0x3c05
> +#define TG_SRC_MAC_H      0x3c06
> +#define TG_DST_MAC_L      0x3c07
> +#define TG_DST_MAC_H      0x3c08
> +#define TG_PKT_XFRD       0x3c09
> +#define TG_RANDOM_SEED(n) (0x3c0a + (n))
> +#define TG_PKT_LEN        0x3c0d
> +
> +#define TM_NUM_PKT        0x3d00
> +#define TM_PKT_GOOD       0x3d01
> +#define TM_PKT_BAD        0x3d02
> +#define TM_BYTE_CNT0      0x3d03
> +#define TM_BYTE_CNT1      0x3d04
> +#define TM_AVST_RX_ERR    0x3d07
> +#define   OVERFLOW_ERR    (1 << 9)
> +#define   LENGTH_ERR      (1 << 8)
> +#define   OVERSIZE_ERR    (1 << 7)
> +#define   UNDERSIZE_ERR   (1 << 6)
> +#define   MAC_CRC_ERR     (1 << 5)
> +#define   PHY_ERR         (1 << 4)
> +#define   ERR_VALID       (1 << 3)
> +
> +#define LOOPBACK_EN          0x3e00
> +#define LOOPBACK_FIFO_STATUS 0x3e01
> +#define   ALMOST_EMPTY    (1 << 1)
> +#define   ALMOST_FULL     (1 << 0)
> +
> +#define MAILBOX_TIMEOUT_MS       100
> +#define MAILBOX_POLL_INTERVAL_MS 10
> +
> +struct traffic_ctrl_cmd {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t read_cmd:1;
> +			uint32_t write_cmd:1;
> +			uint32_t ack_trans:1;
> +			uint32_t rsvd1:29;
> +			uint32_t afu_cmd_addr:16;
> +			uint32_t rsvd2:16;
> +		};
> +	};
> +};
> +
> +struct traffic_ctrl_data {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t read_data;
> +			uint32_t write_data;
> +		};
> +	};
> +};
> +
> +struct traffic_ctrl_ch_sel {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t channel_sel:3;
> +			uint32_t rsvd1:29;
> +			uint32_t rsvd2;
> +		};
> +	};
> +};
> +
> +struct he_hssi_ctx {
> +	uint8_t *addr;
> +};
> +
> +struct he_hssi_priv {
> +	struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
> +	struct he_hssi_ctx he_hssi_ctx;
> +};
> +
> +#endif /* _HE_HSSI_H_ */
> diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c
> new file mode 100644
> index 0000000..8735647
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_lbk.c
> @@ -0,0 +1,427 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "he_lbk.h"
> +
> +static int he_lbk_afu_config(struct afu_mf_rawdev *dev)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +	struct he_lbk_csr_cfg v;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_lbk_cfg;
> +
> +	v.csr = 0;
> +
> +	if (cfg->cont)
> +		v.cont = 1;
> +
> +	v.mode = cfg->mode;
> +	v.trput_interleave = cfg->trput_interleave;
> +	if (cfg->multi_cl == 4)
> +		v.multicl_len = 2;
> +	else
> +		v.multicl_len = cfg->multi_cl - 1;
> +
> +	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
> +	rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
> +
> +	return 0;
> +}
> +
> +static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +	struct he_lbk_dsm_status *stat = NULL;
> +	struct he_lbk_status0 stat0;
> +	struct he_lbk_status1 stat1;
> +	uint64_t swtest_msg = 0;
> +	uint64_t ticks = 0;
> +	uint64_t info = 0;
> +	double num, rd_bw, wr_bw;
> +
> +	if (!dev || !dev->priv)
> +		return;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	cfg = &priv->he_lbk_cfg;
> +	ctx = &priv->he_lbk_ctx;
> +
> +	stat = ctx->status_ptr;
> +
> +	swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
> +	stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
> +	stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
> +
> +	if (cfg->cont)
> +		ticks = stat->num_clocks - stat->start_overhead;
> +	else
> +		ticks = stat->num_clocks -
> +			(stat->start_overhead + stat->end_overhead);
> +
> +	if (cfg->freq_mhz == 0) {
> +		info = rte_read64(ctx->addr + CSR_HE_INFO0);
> +		AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
> +		cfg->freq_mhz = info & 0xffff;
> +		if (cfg->freq_mhz == 0) {
> +			AFU_MF_PMD_INFO("Frequency of AFU clock is
> unknown."
> +				" Assuming 350 MHz.");
> +			cfg->freq_mhz = 350;
> +		}
> +	}
> +
> +	num = (double)stat0.num_reads;
> +	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +	num = (double)stat0.num_writes;
> +	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +
> +	printf("Cachelines  Read_Count Write_Count Pend_Read
> Pend_Write "
> +		"Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
> +		cfg->freq_mhz);
> +	printf("%10u  %10u %10u %10u %10u  %12"PRIu64
> +		"   %7.3f GB/s   %7.3f GB/s\n",
> +		cl, stat0.num_reads, stat0.num_writes,
> +		stat1.num_pend_reads, stat1.num_pend_writes,
> +		ticks, rd_bw / 1e9, wr_bw / 1e9);
> +	printf("Test Message: 0x%"PRIx64"\n", swtest_msg);
> +}
> +
> +static int he_lbk_test(struct afu_mf_rawdev *dev)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +	struct he_lbk_csr_ctl ctl;
> +	uint32_t *ptr = NULL;
> +	uint32_t i, j, cl, val = 0;
> +	uint64_t sval = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_lbk_cfg;
> +	ctx = &priv->he_lbk_ctx;
> +
> +	ctl.csr = 0;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +	rte_delay_us(1000);
> +	ctl.reset = 1;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +	/* initialize DMA addresses */
> +	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr +
> CSR_SRC_ADDR);
> +
> +	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr +
> CSR_DST_ADDR);
> +
> +	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
> +	rte_write32(SIZE_TO_CLS(ctx->dsm_iova), ctx->addr +
> CSR_AFU_DSM_BASEL);
> +	rte_write32(SIZE_TO_CLS(ctx->dsm_iova) >> 32,
> +		ctx->addr + CSR_AFU_DSM_BASEH);
> +
> +	ret = he_lbk_afu_config(dev);
> +	if (ret)
> +		return ret;
> +
> +	/* initialize src data */
> +	ptr = (uint32_t *)ctx->src_ptr;
> +	j = CLS_TO_SIZE(cfg->end) >> 2;
> +	for (i = 0; i < j; i++)
> +		*ptr++ = i;
> +
> +	/* start test */
> +	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
> +		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
> +		memset(ctx->dsm_ptr, 0, DSM_SIZE);
> +
> +		ctl.csr = 0;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		rte_delay_us(1000);
> +		ctl.reset = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
> +
> +		ctl.start = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		if (cfg->cont) {
> +			rte_delay_ms(cfg->timeout * 1000);
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +		} else {
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		}
> +
> +		he_lbk_report(dev, cl);
> +
> +		i = 0;
> +		while (i++ < 100) {
> +			sval = rte_read64(ctx->addr + CSR_STATUS1);
> +			if (sval == 0)
> +				break;
> +			rte_delay_us(1000);
> +		}
> +
> +		if (cfg->mode == NLB_MODE_LPBK) {
> +			ptr = (uint32_t *)ctx->dest_ptr;
> +			j = CLS_TO_SIZE(cl) >> 2;
> +			for (i = 0; i < j; i++) {
> +				if (*ptr++ != i) {
> +					AFU_MF_PMD_ERR("Data mismatch
> @ %u", i);
> +					break;
> +				}
> +			}
> +		}
> +	}
> +
> +end:
> +	return 0;
> +}
> +
> +static int he_lbk_ctx_release(struct afu_mf_rawdev *dev)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->he_lbk_ctx;
> +
> +	rte_free(ctx->dsm_ptr);
> +	ctx->dsm_ptr = NULL;
> +	ctx->status_ptr = NULL;
> +
> +	rte_free(ctx->src_ptr);
> +	ctx->src_ptr = NULL;
> +
> +	rte_free(ctx->dest_ptr);
> +	ctx->dest_ptr = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_lbk_ctx_init(struct afu_mf_rawdev *dev)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->he_lbk_ctx;
> +	ctx->addr = (uint8_t *)dev->addr;
> +
> +	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE,
> TEST_MEM_ALIGN);
> +	if (!ctx->dsm_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
> +	if (ctx->dsm_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->src_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
> +	if (ctx->src_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->dest_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
> +	if (ctx->dest_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
> +	return 0;
> +
> +release:
> +	he_lbk_ctx_release(dev);
> +	return ret;
> +}
> +
> +static int he_lbk_init(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv) {
> +		dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
> +		if (!dev->priv)
> +			return -ENOMEM;
> +	}
> +
> +	return he_lbk_ctx_init(dev);
> +}
> +
> +static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
> +		return -EINVAL;
> +
> +	cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
> +	if (cfg->mode > NLB_MODE_TRPUT)
> +		return -EINVAL;
> +	if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
> +		(cfg->multi_cl != 4))
> +		return -EINVAL;
> +	if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin >
> MAX_CACHE_LINES))
> +		return -EINVAL;
> +	if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
> +		return -EINVAL;
> +
> +	rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
> +
> +	return 0;
> +}
> +
> +static int he_lbk_close(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	he_lbk_ctx_release(dev);
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	ctx = &priv->he_lbk_ctx;
> +
> +	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +	fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
> +	fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
> +	fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
> +	fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
> +	fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
> +	fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
> +	fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops he_lbk_ops = {
> +	.init = he_lbk_init,
> +	.config = he_lbk_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = he_lbk_test,
> +	.close = he_lbk_close,
> +	.dump = he_lbk_dump,
> +	.reset = NULL
> +};
> +
> +struct afu_mf_drv he_lbk_drv = {
> +	.uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
> +	.ops = &he_lbk_ops
> +};
> +
> +struct afu_mf_drv he_mem_lbk_drv = {
> +	.uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
> +	.ops = &he_lbk_ops
> +};
> diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h
> new file mode 100644
> index 0000000..c2e8a29
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_lbk.h
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _HE_LBK_H_
> +#define _HE_LBK_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define HE_LBK_UUID_L      0xb94b12284c31e02b
> +#define HE_LBK_UUID_H      0x56e203e9864f49a7
> +#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb
> +#define HE_MEM_LBK_UUID_H  0x8568ab4e6ba54616
> +
> +extern struct afu_mf_drv he_lbk_drv;
> +extern struct afu_mf_drv he_mem_lbk_drv;
> +
> +/* HE-LBK & HE-MEM-LBK registers definition */
> +#define CSR_SCRATCHPAD0    0x100
> +#define CSR_SCRATCHPAD1    0x108
> +#define CSR_AFU_DSM_BASEL  0x110
> +#define CSR_AFU_DSM_BASEH  0x114
> +#define CSR_SRC_ADDR       0x120
> +#define CSR_DST_ADDR       0x128
> +#define CSR_NUM_LINES      0x130
> +#define CSR_CTL            0x138
> +#define CSR_CFG            0x140
> +#define CSR_INACT_THRESH   0x148
> +#define CSR_INTERRUPT0     0x150
> +#define CSR_SWTEST_MSG     0x158
> +#define CSR_STATUS0        0x160
> +#define CSR_STATUS1        0x168
> +#define CSR_ERROR          0x170
> +#define CSR_STRIDE         0x178
> +#define CSR_HE_INFO0       0x180
> +
> +#define DSM_SIZE           0x200000
> +#define DSM_POLL_INTERVAL  5  /* ms */
> +#define DSM_TIMEOUT        1000  /* ms */
> +
> +#define NLB_BUF_SIZE  0x400000
> +#define TEST_MEM_ALIGN  1024
> +
> +struct he_lbk_csr_ctl {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t reset:1;
> +			uint32_t start:1;
> +			uint32_t force_completion:1;
> +			uint32_t reserved:29;
> +		};
> +	};
> +};
> +
> +struct he_lbk_csr_cfg {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t rsvd1:1;
> +			uint32_t cont:1;
> +			uint32_t mode:3;
> +			uint32_t multicl_len:2;
> +			uint32_t rsvd2:13;
> +			uint32_t trput_interleave:3;
> +			uint32_t test_cfg:5;
> +			uint32_t interrupt_on_error:1;
> +			uint32_t interrupt_testmode:1;
> +			uint32_t rsvd3:2;
> +		};
> +	};
> +};
> +
> +struct he_lbk_status0 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_writes;
> +			uint32_t num_reads;
> +		};
> +	};
> +};
> +
> +struct he_lbk_status1 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_pend_writes;
> +			uint32_t num_pend_reads;
> +		};
> +	};
> +};
> +
> +struct he_lbk_dsm_status {
> +	uint32_t test_complete;
> +	uint32_t test_error;
> +	uint64_t num_clocks;
> +	uint32_t num_reads;
> +	uint32_t num_writes;
> +	uint32_t start_overhead;
> +	uint32_t end_overhead;
> +};
> +
> +struct he_lbk_ctx {
> +	uint8_t *addr;
> +	uint8_t *dsm_ptr;
> +	uint64_t dsm_iova;
> +	uint8_t *src_ptr;
> +	uint64_t src_iova;
> +	uint8_t *dest_ptr;
> +	uint64_t dest_iova;
> +	struct he_lbk_dsm_status *status_ptr;
> +};
> +
> +struct he_lbk_priv {
> +	struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
> +	struct he_lbk_ctx he_lbk_ctx;
> +};
> +
> +#endif /* _HE_LBK_H_ */
> diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
> new file mode 100644
> index 0000000..ccbb3a8
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_mem.c
> @@ -0,0 +1,181 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "he_mem.h"
> +
> +static int he_mem_tg_test(struct afu_mf_rawdev *dev)
> +{
> +	struct he_mem_tg_priv *priv = NULL;
> +	struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
> +	struct he_mem_tg_ctx *ctx = NULL;
> +	uint64_t value = 0x12345678;
> +	uint64_t cap = 0;
> +	uint64_t channel_mask = 0;
> +	int i, t = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_mem_tg_cfg;
> +	ctx = &priv->he_mem_tg_ctx;
> +
> +	AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
> +
> +	rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
> +	cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
> +	AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
> +	if (cap != value) {
> +		AFU_MF_PMD_ERR("Test scratchpad register failed");
> +		return -EIO;
> +	}
> +
> +	cap = rte_read64(ctx->addr + MEM_TG_CTRL);
> +	AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
> +
> +	channel_mask = cfg->channel_mask & cap;
> +	/* start traffic generators */
> +	rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
> +
> +	/* check test status */
> +	while (t < MEM_TG_TIMEOUT_MS) {
> +		value = rte_read64(ctx->addr + MEM_TG_STAT);
> +		for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
> +			if (channel_mask & (1 << i)) {
> +				if (TGACTIVE(value, i))
> +					continue;
> +				printf("TG channel %d test %s\n", i,
> +					TGPASS(value, i) ? "pass" :
> +					TGTIMEOUT(value, i) ? "timeout" :
> +					TGFAIL(value, i) ? "fail" : "error");
> +				channel_mask &= ~(1 << i);
> +			}
> +		}
> +		if (!channel_mask)
> +			break;
> +		rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
> +		t += MEM_TG_POLL_INTERVAL_MS;
> +	}
> +
> +	if (channel_mask) {
> +		AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned
> long)value);
> +		return channel_mask;
> +	}
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_init(struct afu_mf_rawdev *dev)
> +{
> +	struct he_mem_tg_priv *priv = NULL;
> +	struct he_mem_tg_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv) {
> +		priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
> +		if (!priv)
> +			return -ENOMEM;
> +		dev->priv = priv;
> +	}
> +
> +	ctx = &priv->he_mem_tg_ctx;
> +	ctx->addr = (uint8_t *)dev->addr;
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct he_mem_tg_priv *priv = NULL;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
> +		return -EINVAL;
> +
> +	rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv-
> >he_mem_tg_cfg));
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_close(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f)
> +{
> +	struct he_mem_tg_priv *priv = NULL;
> +	struct he_mem_tg_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	ctx = &priv->he_mem_tg_ctx;
> +
> +	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops he_mem_tg_ops = {
> +	.init = he_mem_tg_init,
> +	.config = he_mem_tg_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = he_mem_tg_test,
> +	.close = he_mem_tg_close,
> +	.dump = he_mem_tg_dump,
> +	.reset = NULL
> +};
> +
> +struct afu_mf_drv he_mem_tg_drv = {
> +	.uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
> +	.ops = &he_mem_tg_ops
> +};
> diff --git a/drivers/raw/afu_mf/he_mem.h
> b/drivers/raw/afu_mf/he_mem.h
> new file mode 100644
> index 0000000..82404b6
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_mem.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _HE_MEM_H_
> +#define _HE_MEM_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
> +#define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
> +
> +#define NUM_MEM_TG_CHANNELS      4
> +#define MEM_TG_TIMEOUT_MS     5000
> +#define MEM_TG_POLL_INTERVAL_MS 10
> +
> +extern struct afu_mf_drv he_mem_tg_drv;
> +
> +/* MEM-TG registers definition */
> +#define MEM_TG_SCRATCHPAD   0x28
> +#define MEM_TG_CTRL         0x30
> +#define   TGCONTROL(n)      (1 << (n))
> +#define MEM_TG_STAT         0x38
> +#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
> +#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
> +#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
> +#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
> +#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
> +
> +struct he_mem_tg_ctx {
> +	uint8_t *addr;
> +};
> +
> +struct he_mem_tg_priv {
> +	struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
> +	struct he_mem_tg_ctx he_mem_tg_ctx;
> +};
> +
> +#endif /* _HE_MEM_H_ */
> diff --git a/drivers/raw/afu_mf/meson.build
> b/drivers/raw/afu_mf/meson.build
> new file mode 100644
> index 0000000..f304bc8
> --- /dev/null
> +++ b/drivers/raw/afu_mf/meson.build
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright 2022 Intel Corporation
> +
> +deps += ['rawdev', 'bus_pci', 'bus_ifpga']
> +sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
> +	'he_hssi.c')
> +
> +headers = files('rte_pmd_afu.h')
> diff --git a/drivers/raw/afu_mf/n3000_afu.c
> b/drivers/raw/afu_mf/n3000_afu.c
> new file mode 100644
> index 0000000..19d7c54
> --- /dev/null
> +++ b/drivers/raw/afu_mf/n3000_afu.c
> @@ -0,0 +1,2005 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "n3000_afu.h"
> +
> +static int nlb_afu_config(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> +	struct nlb_csr_cfg v;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	cfg = &priv->nlb_cfg;
> +
> +	v.csr = 0;
> +
> +	if (cfg->cont)
> +		v.cont = 1;
> +
> +	if (cfg->cache_policy == NLB_WRPUSH_I)
> +		v.wrpush_i = 1;
> +	else
> +		v.wrthru_en = cfg->cache_policy;
> +
> +	if (cfg->cache_hint == NLB_RDLINE_MIXED)
> +		v.rdsel = 3;
> +	else
> +		v.rdsel = cfg->cache_hint;
> +
> +	v.mode = cfg->mode;
> +	v.chsel = cfg->read_vc;
> +	v.wr_chsel = cfg->write_vc;
> +	v.wrfence_chsel = cfg->wrfence_vc;
> +	v.wrthru_en = cfg->cache_policy;
> +	v.multicl_len = cfg->multi_cl - 1;
> +
> +	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
> +	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
> +
> +	return 0;
> +}
> +
> +static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> +	struct nlb_dsm_status *stat = NULL;
> +	uint64_t ticks = 0;
> +	double num, rd_bw, wr_bw;
> +
> +	if (!dev || !dev->priv)
> +		return;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +
> +	cfg = &priv->nlb_cfg;
> +	stat = priv->nlb_ctx.status_ptr;
> +
> +	if (cfg->cont)
> +		ticks = stat->num_clocks - stat->start_overhead;
> +	else
> +		ticks = stat->num_clocks -
> +			(stat->start_overhead + stat->end_overhead);
> +
> +	if (cfg->freq_mhz == 0)
> +		cfg->freq_mhz = 200;
> +
> +	num = (double)stat->num_reads;
> +	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +	num = (double)stat->num_writes;
> +	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +
> +	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
> +		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
> +	printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
> +		cl, stat->num_reads, stat->num_writes, ticks,
> +		rd_bw / 1e9, wr_bw / 1e9);
> +}
> +
> +static int nlb_afu_test(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct nlb_afu_ctx *ctx = NULL;
> +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> +	struct nlb_csr_ctl ctl;
> +	uint32_t *ptr = NULL;
> +	uint32_t i, j, cl, val = 0;
> +	uint64_t sval = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	ctx = &priv->nlb_ctx;
> +	cfg = &priv->nlb_cfg;
> +
> +	/* initialize registers */
> +	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
> +	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
> +
> +	ctl.csr = 0;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +	ctl.reset = 1;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr +
> CSR_SRC_ADDR);
> +	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr +
> CSR_DST_ADDR);
> +
> +	ret = nlb_afu_config(dev);
> +	if (ret)
> +		return ret;
> +
> +	/* initialize src data */
> +	ptr = (uint32_t *)ctx->src_ptr;
> +	j = CLS_TO_SIZE(cfg->end) >> 2;
> +	for (i = 0; i < j; i++)
> +		*ptr++ = i;
> +
> +	/* start test */
> +	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
> +		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
> +		memset(ctx->dsm_ptr, 0, DSM_SIZE);
> +
> +		ctl.csr = 0;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		ctl.reset = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
> +
> +		rte_delay_us(10);
> +
> +		ctl.start = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		if (cfg->cont) {
> +			rte_delay_ms(cfg->timeout * 1000);
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +		} else {
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		}
> +
> +		nlb_afu_report(dev, cl);
> +
> +		i = 0;
> +		while (i++ < 100) {
> +			sval = rte_read64(ctx->addr + CSR_STATUS1);
> +			if (sval == 0)
> +				break;
> +			rte_delay_us(1000);
> +		}
> +
> +		ptr = (uint32_t *)ctx->dest_ptr;
> +		j = CLS_TO_SIZE(cl) >> 2;
> +		for (i = 0; i < j; i++) {
> +			if (*ptr++ != i) {
> +				AFU_MF_PMD_ERR("Data mismatch @ %u",
> i);
> +				break;
> +			}
> +		}
> +	}
> +
> +end:
> +	return ret;
> +}
> +
> +static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
> +{
> +	int i = 0;
> +
> +	if (!ctx)
> +		return;
> +
> +	for (i = 0; i < NUM_DMA_BUF; i++) {
> +		rte_free(ctx->dma_buf[i]);
> +		ctx->dma_buf[i] = NULL;
> +	}
> +
> +	rte_free(ctx->data_buf);
> +	ctx->data_buf = NULL;
> +
> +	rte_free(ctx->ref_buf);
> +	ctx->ref_buf = NULL;
> +}
> +
> +static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
> +	struct rte_pmd_afu_dma_cfg *cfg)
> +{
> +	size_t page_sz = sysconf(_SC_PAGE_SIZE);
> +	int i, ret = 0;
> +
> +	if (!ctx || !cfg)
> +		return -EINVAL;
> +
> +	for (i = 0; i < NUM_DMA_BUF; i++) {
> +		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
> +			TEST_MEM_ALIGN);
> +		if (!ctx->dma_buf[i]) {
> +			ret = -ENOMEM;
> +			goto free;
> +		}
> +		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
> +		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
> +			ret = -ENOMEM;
> +			goto free;
> +		}
> +	}
> +
> +	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
> +	if (!ctx->data_buf) {
> +		ret = -ENOMEM;
> +		goto free;
> +	}
> +
> +	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
> +	if (!ctx->ref_buf) {
> +		ret = -ENOMEM;
> +		goto free;
> +	}
> +
> +	return 0;
> +
> +free:
> +	dma_afu_buf_free(ctx);
> +	return ret;
> +}
> +
> +static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
> +{
> +	int *ptr = NULL;
> +	size_t i = 0;
> +	size_t dword_size = 0;
> +
> +	if (!ctx || !size)
> +		return;
> +
> +	ptr = (int *)ctx->ref_buf;
> +
> +	if (ctx->pattern) {
> +		memset(ptr, ctx->pattern, size);
> +	} else {
> +		srand(99);
> +		dword_size = size >> 2;
> +		for (i = 0; i < dword_size; i++)
> +			*ptr++ = rand();
> +	}
> +	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
> +}
> +
> +static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
> +{
> +	uint8_t *src = NULL;
> +	uint8_t *dst = NULL;
> +	size_t i = 0;
> +	int n = 0;
> +
> +	if (!ctx || !size)
> +		return -EINVAL;
> +
> +	src = (uint8_t *)ctx->ref_buf;
> +	dst = (uint8_t *)ctx->data_buf;
> +
> +	if (memcmp(src, dst, size)) {
> +		printf("Transfer is corrupted\n");
> +		if (ctx->verbose) {
> +			for (i = 0; i < size; i++) {
> +				if (*src != *dst) {
> +					if (++n >= ERR_CHECK_LIMIT)
> +						break;
> +					printf("Mismatch at 0x%zx, "
> +						"Expected %02x
> Actual %02x\n",
> +						i, *src, *dst);
> +				}
> +				src++;
> +				dst++;
> +			}
> +			if (n < ERR_CHECK_LIMIT) {
> +				printf("Found %d error bytes\n", n);
> +			} else {
> +				printf("......\n");
> +				printf("Found more than %d error bytes\n",
> n);
> +			}
> +		}
> +		return -1;
> +	}
> +
> +	printf("Transfer is verified\n");
> +	return 0;
> +}
> +
> +static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t
> bytes)
> +{
> +	uint64_t qwords = bytes / sizeof(uint64_t);
> +
> +	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
> +		!IS_ALIGNED_QWORD((uint64_t)bytes))
> +		return;
> +
> +	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
> +		rte_write64(*host_addr, dev_addr);
> +}
> +
> +static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t
> bytes)
> +{
> +	uint64_t qwords = bytes / sizeof(uint64_t);
> +
> +	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
> +		!IS_ALIGNED_QWORD((uint64_t)bytes))
> +		return;
> +
> +	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
> +		*host_addr = rte_read64(dev_addr);
> +}
> +
> +static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
> +{
> +	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
> +
> +	if (!ctx)
> +		return;
> +
> +	if (requested_page != ctx->cur_ase_page) {
> +		rte_write64(requested_page, ctx->ase_ctrl_addr);
> +		ctx->cur_ase_page = requested_page;
> +	}
> +}
> +
> +static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
> +	uint64_t host_addr, uint32_t count)
> +{
> +	uint64_t dev_aligned_addr = 0;
> +	uint64_t shift = 0;
> +	uint64_t val = 0;
> +	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)",
> host_addr,
> +		dev_addr, count);
> +
> +	if (!ctx || (count >= QWORD_BYTES))
> +		return -EINVAL;
> +
> +	if (!count)
> +		return 0;
> +
> +	switch_ase_page(ctx, dev_addr);
> +
> +	shift = dev_addr % QWORD_BYTES;
> +	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
> +	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
> +	rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
> +
> +	/* write back to device */
> +	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
> +
> +	return 0;
> +}
> +
> +static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
> +	uint64_t *src_ptr, uint64_t *count)
> +{
> +	uint64_t src = *src_ptr;
> +	uint64_t dst = *dst_ptr;
> +	uint64_t align_bytes = *count;
> +	uint64_t offset = 0;
> +	uint64_t left_in_page = DMA_ASE_WINDOW;
> +	uint64_t size_to_copy = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64"
> (0x%"PRIx64")", src, dst,
> +		align_bytes);
> +
> +	if (!ctx || !IS_ALIGNED_DWORD(dst))
> +		return -EINVAL;
> +
> +	if (align_bytes < DWORD_BYTES)
> +		return 0;
> +
> +	if (!IS_ALIGNED_QWORD(dst)) {
> +		/* Write out a single DWORD to get QWORD aligned */
> +		switch_ase_page(ctx, dst);
> +		offset = dst & DMA_ASE_WINDOW_MASK;
> +
> +		rte_write32(*(uint32_t *)(uintptr_t)src,
> +			ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	if (!align_bytes)
> +		return 0;
> +
> +	/* Write out blocks of 64-bit values */
> +	while (align_bytes >= QWORD_BYTES) {
> +		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
> +		size_to_copy =
> +			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES -
> 1)));
> +		if (size_to_copy < QWORD_BYTES)
> +			break;
> +		switch_ase_page(ctx, dst);
> +		offset = dst & DMA_ASE_WINDOW_MASK;
> +		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
> +			(uint64_t *)(uintptr_t)src, size_to_copy);
> +		src += size_to_copy;
> +		dst += size_to_copy;
> +		align_bytes -= size_to_copy;
> +	}
> +
> +	if (align_bytes >= DWORD_BYTES) {
> +		/* Write out remaining DWORD */
> +		switch_ase_page(ctx, dst);
> +		offset = dst & DMA_ASE_WINDOW_MASK;
> +		rte_write32(*(uint32_t *)(uintptr_t)src,
> +			ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	*src_ptr = src;
> +	*dst_ptr = dst;
> +	*count = align_bytes;
> +
> +	return 0;
> +}
> +
> +static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
> +	uint64_t *src_ptr, uint64_t count)
> +{
> +	uint64_t dst = *dst_ptr;
> +	uint64_t src = *src_ptr;
> +	uint64_t count_left = count;
> +	uint64_t unaligned_size = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64"
> (0x%"PRIx64")", src, dst,
> +		count);
> +
> +	/* aligns address to 8 byte using dst masking method */
> +	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
> +		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
> +		if (unaligned_size > count_left)
> +			unaligned_size = count_left;
> +		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
> +		if (ret)
> +			return ret;
> +		count_left -= unaligned_size;
> +		src += unaligned_size;
> +		dst += unaligned_size;
> +	}
> +
> +	/* Handles 8/4 byte MMIO transfer */
> +	ret = ase_write(ctx, &dst, &src, &count_left);
> +	if (ret)
> +		return ret;
> +
> +	/* Left over unaligned bytes transferred using dst masking method
> */
> +	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
> +	if (unaligned_size > count_left)
> +		unaligned_size = count_left;
> +
> +	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
> +	if (ret)
> +		return ret;
> +
> +	count_left -= unaligned_size;
> +	*dst_ptr = dst + unaligned_size;
> +	*src_ptr = src + unaligned_size;
> +
> +	return 0;
> +}
> +
> +static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
> +	uint64_t host_addr, uint32_t count)
> +{
> +	uint64_t dev_aligned_addr = 0;
> +	uint64_t shift = 0;
> +	uint64_t val = 0;
> +	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)",
> host_addr,
> +		dev_addr, count);
> +
> +	if (!ctx || (count >= QWORD_BYTES))
> +		return -EINVAL;
> +
> +	if (!count)
> +		return 0;
> +
> +	switch_ase_page(ctx, dev_addr);
> +
> +	shift = dev_addr % QWORD_BYTES;
> +	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
> +	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
> +	rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
> +
> +	return 0;
> +}
> +
> +static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
> +	uint64_t *dst_ptr, uint64_t *count)
> +{
> +	uint64_t src = *src_ptr;
> +	uint64_t dst = *dst_ptr;
> +	uint64_t align_bytes = *count;
> +	uint64_t offset = 0;
> +	uint64_t left_in_page = DMA_ASE_WINDOW;
> +	uint64_t size_to_copy = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64"
> (0x%"PRIx64")", dst, src,
> +		align_bytes);
> +
> +	if (!ctx || !IS_ALIGNED_DWORD(src))
> +		return -EINVAL;
> +
> +	if (align_bytes < DWORD_BYTES)
> +		return 0;
> +
> +	if (!IS_ALIGNED_QWORD(src)) {
> +		/* Read a single DWORD to get QWORD aligned */
> +		switch_ase_page(ctx, src);
> +		offset = src & DMA_ASE_WINDOW_MASK;
> +		*(uint32_t *)(uintptr_t)dst =
> +			rte_read32(ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	if (!align_bytes)
> +		return 0;
> +
> +	/* Read blocks of 64-bit values */
> +	while (align_bytes >= QWORD_BYTES) {
> +		left_in_page -= src & DMA_ASE_WINDOW_MASK;
> +		size_to_copy =
> +			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES -
> 1)));
> +		if (size_to_copy < QWORD_BYTES)
> +			break;
> +		switch_ase_page(ctx, src);
> +		offset = src & DMA_ASE_WINDOW_MASK;
> +		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
> +			(uint64_t *)(uintptr_t)dst, size_to_copy);
> +		src += size_to_copy;
> +		dst += size_to_copy;
> +		align_bytes -= size_to_copy;
> +	}
> +
> +	if (align_bytes >= DWORD_BYTES) {
> +		/* Read remaining DWORD */
> +		switch_ase_page(ctx, src);
> +		offset = src & DMA_ASE_WINDOW_MASK;
> +		*(uint32_t *)(uintptr_t)dst =
> +			rte_read32(ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	*src_ptr = src;
> +	*dst_ptr = dst;
> +	*count = align_bytes;
> +
> +	return 0;
> +}
> +
> +static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
> +	uint64_t *dst_ptr, uint64_t count)
> +{
> +	uint64_t src = *src_ptr;
> +	uint64_t dst = *dst_ptr;
> +	uint64_t count_left = count;
> +	uint64_t unaligned_size = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64"
> (0x%"PRIx64")", src, dst,
> +		count);
> +
> +	/* Aligns address to 8 byte using src masking method */
> +	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
> +		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
> +		if (unaligned_size > count_left)
> +			unaligned_size = count_left;
> +		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
> +		if (ret)
> +			return ret;
> +		count_left -= unaligned_size;
> +		dst += unaligned_size;
> +		src += unaligned_size;
> +	}
> +
> +	/* Handles 8/4 byte MMIO transfer */
> +	ret = ase_read(ctx, &src, &dst, &count_left);
> +	if (ret)
> +		return ret;
> +
> +	/* Left over unaligned bytes transferred using src masking method */
> +	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
> +	if (unaligned_size > count_left)
> +		unaligned_size = count_left;
> +
> +	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
> +	if (ret)
> +		return ret;
> +
> +	count_left -= unaligned_size;
> +	*dst_ptr = dst + unaligned_size;
> +	*src_ptr = src + unaligned_size;
> +
> +	return 0;
> +}
> +
> +static void clear_interrupt(struct dma_afu_ctx *ctx)
> +{
> +	/* clear interrupt by writing 1 to IRQ bit in status register */
> +	msgdma_status status;
> +
> +	if (!ctx)
> +		return;
> +
> +	status.csr = 0;
> +	status.irq = 1;
> +	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
> +}
> +
> +static int poll_interrupt(struct dma_afu_ctx *ctx)
> +{
> +	struct pollfd pfd = {0};
> +	uint64_t count = 0;
> +	ssize_t bytes_read = 0;
> +	int poll_ret = 0;
> +	int ret = 0;
> +
> +	if (!ctx || (ctx->event_fd < 0))
> +		return -EINVAL;
> +
> +	pfd.fd = ctx->event_fd;
> +	pfd.events = POLLIN;
> +	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
> +	if (poll_ret < 0) {
> +		AFU_MF_PMD_ERR("Error %s", strerror(errno));
> +		ret = -EFAULT;
> +		goto out;
> +	} else if (poll_ret == 0) {
> +		AFU_MF_PMD_ERR("Timeout");
> +		ret = -ETIMEDOUT;
> +	} else {
> +		bytes_read = read(pfd.fd, &count, sizeof(count));
> +		if (bytes_read > 0) {
> +			if (ctx->verbose)
> +				AFU_MF_PMD_DEBUG("Successful, ret %d,
> cnt %"PRIu64,
> +					poll_ret, count);
> +			ret = 0;
> +		} else {
> +			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
> +				strerror(errno) : "zero bytes read");
> +			ret = -EIO;
> +		}
> +	}
> +out:
> +	clear_interrupt(ctx);
> +	return ret;
> +}
> +
> +static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc
> *desc)
> +{
> +	msgdma_status status;
> +	uint64_t fpga_queue_full = 0;
> +
> +	if (!ctx)
> +		return;
> +
> +	if (ctx->verbose) {
> +		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
> +			desc->rd_address_ext, desc->rd_address);
> +		AFU_MF_PMD_DEBUG("descriptor.wr_address =
> 0x%x%08x",
> +			desc->wr_address_ext, desc->wr_address);
> +		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
> +		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
> +			desc->wr_burst_count);
> +		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
> +			desc->rd_burst_count);
> +		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc-
> >wr_stride);
> +		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc-
> >rd_stride);
> +	}
> +
> +	do {
> +		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
> +		if (fpga_queue_full++ > 100000000) {
> +			AFU_MF_PMD_DEBUG("DMA queue full retry");
> +			fpga_queue_full = 0;
> +		}
> +	} while (status.desc_buf_full);
> +
> +	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
> +		sizeof(*desc));
> +}
> +
> +static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
> +	int count, int is_last_desc, fpga_dma_type type, int intr_en)
> +{
> +	msgdma_ext_desc *desc = NULL;
> +	int alignment_offset = 0;
> +	int segment_size = 0;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	/* src, dst and count must be 64-byte aligned */
> +	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
> +		!IS_DMA_ALIGNED(count))
> +		return -EINVAL;
> +	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
> +
> +	/* these fields are fixed for all DMA transfers */
> +	desc = ctx->desc_buf;
> +	desc->seq_num = 0;
> +	desc->wr_stride = 1;
> +	desc->rd_stride = 1;
> +	desc->control.go = 1;
> +	if (intr_en)
> +		desc->control.transfer_irq_en = 1;
> +	else
> +		desc->control.transfer_irq_en = 0;
> +
> +	if (!is_last_desc)
> +		desc->control.early_done_en = 1;
> +	else
> +		desc->control.early_done_en = 0;
> +
> +	if (type == FPGA_TO_FPGA) {
> +		desc->rd_address = src & DMA_MASK_32_BIT;
> +		desc->wr_address = dst & DMA_MASK_32_BIT;
> +		desc->len = count;
> +		desc->wr_burst_count = 4;
> +		desc->rd_burst_count = 4;
> +		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
> +		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
> +		send_descriptor(ctx, desc);
> +	} else {
> +		/* check CCIP (host) address is aligned to 4CL (256B) */
> +		alignment_offset = (type == HOST_TO_FPGA)
> +			? (src % CCIP_ALIGN_BYTES) : (dst %
> CCIP_ALIGN_BYTES);
> +		/* performing a short transfer to get aligned */
> +		if (alignment_offset != 0) {
> +			desc->rd_address = src & DMA_MASK_32_BIT;
> +			desc->wr_address = dst & DMA_MASK_32_BIT;
> +			desc->wr_burst_count = 1;
> +			desc->rd_burst_count = 1;
> +			desc->rd_address_ext = (src >> 32) &
> DMA_MASK_32_BIT;
> +			desc->wr_address_ext = (dst >> 32) &
> DMA_MASK_32_BIT;
> +			/* count isn't large enough to hit next 4CL boundary
> */
> +			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count)
> {
> +				segment_size = count;
> +				count = 0;
> +			} else {
> +				segment_size = CCIP_ALIGN_BYTES
> +					- alignment_offset;
> +				src += segment_size;
> +				dst += segment_size;
> +				count -= segment_size;
> +				desc->control.transfer_irq_en = 0;
> +			}
> +			/* post short transfer to align to a 4CL (256 byte) */
> +			desc->len = segment_size;
> +			send_descriptor(ctx, desc);
> +		}
> +		/* at this point we are 4CL (256 byte) aligned */
> +		if (count >= CCIP_ALIGN_BYTES) {
> +			desc->rd_address = src & DMA_MASK_32_BIT;
> +			desc->wr_address = dst & DMA_MASK_32_BIT;
> +			desc->wr_burst_count = 4;
> +			desc->rd_burst_count = 4;
> +			desc->rd_address_ext = (src >> 32) &
> DMA_MASK_32_BIT;
> +			desc->wr_address_ext = (dst >> 32) &
> DMA_MASK_32_BIT;
> +			/* buffer ends on 4CL boundary */
> +			if ((count % CCIP_ALIGN_BYTES) == 0) {
> +				segment_size = count;
> +				count = 0;
> +			} else {
> +				segment_size = count
> +					- (count % CCIP_ALIGN_BYTES);
> +				src += segment_size;
> +				dst += segment_size;
> +				count -= segment_size;
> +				desc->control.transfer_irq_en = 0;
> +			}
> +			desc->len = segment_size;
> +			send_descriptor(ctx, desc);
> +		}
> +		/* post short transfer to handle the remainder */
> +		if (count > 0) {
> +			desc->rd_address = src & DMA_MASK_32_BIT;
> +			desc->wr_address = dst & DMA_MASK_32_BIT;
> +			desc->len = count;
> +			desc->wr_burst_count = 1;
> +			desc->rd_burst_count = 1;
> +			desc->rd_address_ext = (src >> 32) &
> DMA_MASK_32_BIT;
> +			desc->wr_address_ext = (dst >> 32) &
> DMA_MASK_32_BIT;
> +			if (intr_en)
> +				desc->control.transfer_irq_en = 1;
> +			send_descriptor(ctx, desc);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int issue_magic(struct dma_afu_ctx *ctx)
> +{
> +	*(ctx->magic_buf) = 0ULL;
> +	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
> +		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
> +}
> +
> +static void wait_magic(struct dma_afu_ctx *ctx)
> +{
> +	int magic_timeout = 0;
> +
> +	if (!ctx)
> +		return;
> +
> +	poll_interrupt(ctx);
> +	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
> +		if (magic_timeout++ > 1000) {
> +			AFU_MF_PMD_ERR("DMA magic operation
> timeout");
> +			magic_timeout = 0;
> +			break;
> +		}
> +	}
> +	*(ctx->magic_buf) = 0ULL;
> +}
> +
> +static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
> +	uint64_t chunk, int is_last_chunk, int *intr_issued)
> +{
> +	int intr_en = 0;
> +	int ret = 0;
> +
> +	if (!ctx || !intr_issued)
> +		return -EINVAL;
> +
> +	src += chunk * ctx->dma_buf_size;
> +	dst += chunk * ctx->dma_buf_size;
> +
> +	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) ||
> is_last_chunk) {
> +		if (*intr_issued) {
> +			ret = poll_interrupt(ctx);
> +			if (ret)
> +				return ret;
> +		}
> +		intr_en = 1;
> +	}
> +
> +	chunk %= NUM_DMA_BUF;
> +	rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
> +		ctx->dma_buf_size);
> +	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
> +			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
> +	if (intr_en)
> +		*intr_issued = 1;
> +
> +	return ret;
> +}
> +
> +static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst,
> uint64_t src,
> +	size_t count)
> +{
> +	uint64_t i = 0;
> +	uint64_t count_left = count;
> +	uint64_t aligned_addr = 0;
> +	uint64_t align_bytes = 0;
> +	uint64_t dma_chunks = 0;
> +	uint64_t dma_tx_bytes = 0;
> +	uint64_t offset = 0;
> +	int issued_intr = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src,
> dst,
> +		count);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (!IS_DMA_ALIGNED(dst)) {
> +		if (count_left < DMA_ALIGN_BYTES)
> +			return ase_host_to_fpga(ctx, &dst, &src, count_left);
> +
> +		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
> +			* DMA_ALIGN_BYTES;
> +		align_bytes = aligned_addr - dst;
> +		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
> +		if (ret)
> +			return ret;
> +		count_left = count_left - align_bytes;
> +	}
> +
> +	if (count_left) {
> +		dma_chunks = count_left / ctx->dma_buf_size;
> +		offset = dma_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
> +			" (%"PRIu64"...0x%"PRIx64")",
> +			src, dst, dma_chunks, count_left);
> +		for (i = 0; i < dma_chunks; i++) {
> +			ret = dma_tx_buf(ctx, dst, src, i,
> +				i == (dma_chunks - 1), &issued_intr);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (issued_intr) {
> +			ret = poll_interrupt(ctx);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (count_left) {
> +			i = count_left / DMA_ALIGN_BYTES;
> +			if (i > 0) {
> +				dma_tx_bytes = i * DMA_ALIGN_BYTES;
> +				AFU_MF_PMD_DEBUG("left over
> 0x%"PRIx64" to DMA",
> +					dma_tx_bytes);
> +				rte_memcpy(ctx->dma_buf[0],
> +					(void *)(uintptr_t)(src + offset),
> +					dma_tx_bytes);
> +				ret = do_dma(ctx, dst + offset,
> +					DMA_HOST_ADDR(ctx-
> >dma_iova[0]),
> +					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
> +				if (ret)
> +					return ret;
> +				ret = poll_interrupt(ctx);
> +				if (ret)
> +					return ret;
> +			}
> +
> +			count_left -= dma_tx_bytes;
> +			if (count_left) {
> +				AFU_MF_PMD_DEBUG("left over
> 0x%"PRIx64" to ASE",
> +					count_left);
> +				dst += offset + dma_tx_bytes;
> +				src += offset + dma_tx_bytes;
> +				ret = ase_host_to_fpga(ctx, &dst, &src,
> +					count_left);
> +			}
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
> +	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
> +{
> +	uint64_t i = chunk % NUM_DMA_BUF;
> +	uint64_t n = *rx_count;
> +	uint64_t num_pending = 0;
> +	int ret = 0;
> +
> +	if (!ctx || !wf_issued)
> +		return -EINVAL;
> +
> +	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
> +		src + chunk * ctx->dma_buf_size,
> +		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
> +	if (ret)
> +		return ret;
> +
> +	num_pending = chunk - n + 1;
> +	if (num_pending == HALF_DMA_BUF) {
> +		ret = issue_magic(ctx);
> +		if (ret) {
> +			AFU_MF_PMD_DEBUG("Magic issue failed");
> +			return ret;
> +		}
> +		*wf_issued = 1;
> +	}
> +
> +	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
> +		if (*wf_issued) {
> +			wait_magic(ctx);
> +			for (i = 0; i < HALF_DMA_BUF; i++) {
> +				rte_memcpy((void *)(uintptr_t)(dst +
> +						n * ctx->dma_buf_size),
> +					ctx->dma_buf[n % NUM_DMA_BUF],
> +					ctx->dma_buf_size);
> +				n++;
> +			}
> +			*wf_issued = 0;
> +			*rx_count = n;
> +		}
> +		ret = issue_magic(ctx);
> +		if (ret) {
> +			AFU_MF_PMD_DEBUG("Magic issue failed");
> +			return ret;
> +		}
> +		*wf_issued = 1;
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst,
> uint64_t src,
> +	size_t count)
> +{
> +	uint64_t i = 0;
> +	uint64_t count_left = count;
> +	uint64_t aligned_addr = 0;
> +	uint64_t align_bytes = 0;
> +	uint64_t dma_chunks = 0;
> +	uint64_t pending_buf = 0;
> +	uint64_t dma_rx_bytes = 0;
> +	uint64_t offset = 0;
> +	int wf_issued = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src,
> dst,
> +		count);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (!IS_DMA_ALIGNED(src)) {
> +		if (count_left < DMA_ALIGN_BYTES)
> +			return ase_fpga_to_host(ctx, &src, &dst, count_left);
> +
> +		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
> +			 * DMA_ALIGN_BYTES;
> +		align_bytes = aligned_addr - src;
> +		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
> +		if (ret)
> +			return ret;
> +		count_left = count_left - align_bytes;
> +	}
> +
> +	if (count_left) {
> +		dma_chunks = count_left / ctx->dma_buf_size;
> +		offset = dma_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
> +			" (%"PRIu64"...0x%"PRIx64")",
> +			src, dst, dma_chunks, count_left);
> +		for (i = 0; i < dma_chunks; i++) {
> +			ret = dma_rx_buf(ctx, dst, src, i,
> +				i == (dma_chunks - 1),
> +				&pending_buf, &wf_issued);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (wf_issued)
> +			wait_magic(ctx);
> +
> +		/* clear out final dma memcpy operations */
> +		while (pending_buf < dma_chunks) {
> +			/* constant size transfer; no length check required */
> +			rte_memcpy((void *)(uintptr_t)(dst +
> +					pending_buf * ctx->dma_buf_size),
> +				ctx->dma_buf[pending_buf %
> NUM_DMA_BUF],
> +				ctx->dma_buf_size);
> +			pending_buf++;
> +		}
> +
> +		if (count_left > 0) {
> +			i = count_left / DMA_ALIGN_BYTES;
> +			if (i > 0) {
> +				dma_rx_bytes = i * DMA_ALIGN_BYTES;
> +				AFU_MF_PMD_DEBUG("left over
> 0x%"PRIx64" to DMA",
> +					dma_rx_bytes);
> +				ret = do_dma(ctx,
> +					DMA_HOST_ADDR(ctx-
> >dma_iova[0]),
> +					src + offset,
> +					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
> +				if (ret)
> +					return ret;
> +				ret = issue_magic(ctx);
> +				if (ret)
> +					return ret;
> +				wait_magic(ctx);
> +				rte_memcpy((void *)(uintptr_t)(dst + offset),
> +					ctx->dma_buf[0], dma_rx_bytes);
> +			}
> +
> +			count_left -= dma_rx_bytes;
> +			if (count_left) {
> +				AFU_MF_PMD_DEBUG("left over
> 0x%"PRIx64" to ASE",
> +					count_left);
> +				dst += offset + dma_rx_bytes;
> +				src += offset + dma_rx_bytes;
> +				ret = ase_fpga_to_host(ctx, &src, &dst,
> +							count_left);
> +			}
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst,
> uint64_t src,
> +	size_t count)
> +{
> +	uint64_t i = 0;
> +	uint64_t count_left = count;
> +	uint64_t dma_chunks = 0;
> +	uint64_t offset = 0;
> +	uint32_t tx_chunks = 0;
> +	uint64_t *tmp_buf = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src,
> dst,
> +		count);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
> +	    && IS_DMA_ALIGNED(count_left)) {
> +		dma_chunks = count_left / ctx->dma_buf_size;
> +		offset = dma_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
> +			" (%"PRIu64"...0x%"PRIx64")",
> +			src, dst, dma_chunks, count_left);
> +		for (i = 0; i < dma_chunks; i++) {
> +			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
> +				src + i * ctx->dma_buf_size,
> +				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
> +			if (ret)
> +				return ret;
> +			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
> +				(i == (dma_chunks - 1))) {
> +				ret = issue_magic(ctx);
> +				if (ret)
> +					return ret;
> +				wait_magic(ctx);
> +			}
> +		}
> +
> +		if (count_left > 0) {
> +			AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to
> DMA", count_left);
> +			ret = do_dma(ctx, dst + offset, src + offset,
> +				count_left, 1, FPGA_TO_FPGA, 0);
> +			if (ret)
> +				return ret;
> +			ret = issue_magic(ctx);
> +			if (ret)
> +				return ret;
> +			wait_magic(ctx);
> +		}
> +	} else {
> +		if ((src < dst) && (src + count_left > dst)) {
> +			AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
> +				" -> 0x%"PRIx64" (0x%"PRIx64")",
> +				src, dst, count_left);
> +			return -EINVAL;
> +		}
> +		tx_chunks = count_left / ctx->dma_buf_size;
> +		offset = tx_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
> +			" (%u...0x%"PRIx64")",
> +			src, dst, tx_chunks, count_left);
> +		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
> +			DMA_ALIGN_BYTES);
> +		for (i = 0; i < tx_chunks; i++) {
> +			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
> +				src + i * ctx->dma_buf_size,
> +				ctx->dma_buf_size);
> +			if (ret)
> +				goto free_buf;
> +			ret = dma_host_to_fpga(ctx,
> +				dst + i * ctx->dma_buf_size,
> +				(uint64_t)tmp_buf, ctx->dma_buf_size);
> +			if (ret)
> +				goto free_buf;
> +		}
> +
> +		if (count_left > 0) {
> +			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
> +				src + offset, count_left);
> +			if (ret)
> +				goto free_buf;
> +			ret = dma_host_to_fpga(ctx, dst + offset,
> +				(uint64_t)tmp_buf, count_left);
> +			if (ret)
> +				goto free_buf;
> +		}
> +free_buf:
> +		rte_free(tmp_buf);
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
> +	uint64_t src, size_t count, fpga_dma_type type)
> +{
> +	int ret = 0;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (type == HOST_TO_FPGA)
> +		ret = dma_host_to_fpga(ctx, dst, src, count);
> +	else if (type == FPGA_TO_HOST)
> +		ret = dma_fpga_to_host(ctx, dst, src, count);
> +	else if (type == FPGA_TO_FPGA)
> +		ret = dma_fpga_to_fpga(ctx, dst, src, count);
> +	else
> +		return -EINVAL;
> +
> +	return ret;
> +}
> +
> +static double getTime(struct timespec start, struct timespec end)
> +{
> +	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
> +		+ end.tv_nsec - start.tv_nsec;
> +	return (double)diff / (double)1000000000L;
> +}
> +
> +#define SWEEP_ITERS 1
> +static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
> +	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
> +{
> +	struct timespec start, end;
> +	uint64_t test_size = 0;
> +	uint64_t *dma_buf_ptr = NULL;
> +	double throughput, total_time = 0.0;
> +	int i = 0;
> +	int ret = 0;
> +
> +	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
> +		AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
> +		return -EINVAL;
> +	}
> +
> +	if (length < (buf_offset + size_decrement)) {
> +		AFU_MF_PMD_ERR("Test length does not match unaligned
> parameter");
> +		return -EINVAL;
> +	}
> +	test_size = length - (buf_offset + size_decrement);
> +	if ((ddr_offset + test_size) > ctx->mem_size) {
> +		AFU_MF_PMD_ERR("Test is out of DDR memory space");
> +		return -EINVAL;
> +	}
> +
> +	dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
> +	printf("Sweep Host %p to FPGA 0x%"PRIx64
> +		" with 0x%"PRIx64" bytes ...\n",
> +		(void *)dma_buf_ptr, ddr_offset, test_size);
> +
> +	for (i = 0; i < SWEEP_ITERS; i++) {
> +		clock_gettime(CLOCK_MONOTONIC, &start);
> +		ret = dma_transfer_sync(ctx, ddr_offset,
> (uint64_t)dma_buf_ptr,
> +			test_size, HOST_TO_FPGA);
> +		clock_gettime(CLOCK_MONOTONIC, &end);
> +		if (ret) {
> +			AFU_MF_PMD_ERR("Failed");
> +			return ret;
> +		}
> +		total_time += getTime(start, end);
> +	}
> +	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
> +	printf("Measured bandwidth = %lf MB/s\n", throughput);
> +
> +	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64"
> bytes ...\n",
> +		ddr_offset, (void *)dma_buf_ptr, test_size);
> +
> +	total_time = 0.0;
> +	memset((char *)dma_buf_ptr, 0, test_size);
> +	for (i = 0; i < SWEEP_ITERS; i++) {
> +		clock_gettime(CLOCK_MONOTONIC, &start);
> +		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr,
> ddr_offset,
> +			test_size, FPGA_TO_HOST);
> +		clock_gettime(CLOCK_MONOTONIC, &end);
> +		if (ret) {
> +			AFU_MF_PMD_ERR("Failed");
> +			return ret;
> +		}
> +		total_time += getTime(start, end);
> +	}
> +	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
> +	printf("Measured bandwidth = %lf MB/s\n", throughput);
> +
> +	printf("Verifying buffer ...\n");
> +	return dma_afu_buf_verify(ctx, test_size);
> +}
> +
> +static int dma_afu_test(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct dma_afu_ctx *ctx = NULL;
> +	struct rte_pmd_afu_dma_cfg *cfg = NULL;
> +	msgdma_ctrl ctrl;
> +	uint64_t offset = 0;
> +	uint32_t i = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	cfg = &priv->dma_cfg;
> +	if (cfg->index >= NUM_N3000_DMA)
> +		return -EINVAL;
> +	ctx = &priv->dma_ctx[cfg->index];
> +
> +	ctx->pattern = (int)cfg->pattern;
> +	ctx->verbose = (int)cfg->verbose;
> +	ctx->dma_buf_size = cfg->size;
> +
> +	ret = dma_afu_buf_alloc(ctx, cfg);
> +	if (ret)
> +		goto free;
> +
> +	printf("Initialize test buffer\n");
> +	dma_afu_buf_init(ctx, cfg->length);
> +
> +	/* enable interrupt */
> +	ctrl.csr = 0;
> +	ctrl.global_intr_en_mask = 1;
> +	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
> +
> +	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
> +		cfg->offset, cfg->length);
> +	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
> +		cfg->length, HOST_TO_FPGA);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from host to
> FPGA");
> +		goto end;
> +	}
> +	memset(ctx->data_buf, 0, cfg->length);
> +
> +	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
> +		ctx->data_buf, cfg->length);
> +	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
> +		cfg->length, FPGA_TO_HOST);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to
> host");
> +		goto end;
> +	}
> +	ret = dma_afu_buf_verify(ctx, cfg->length);
> +	if (ret)
> +		goto end;
> +
> +	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
> +		offset = cfg->offset + cfg->length;
> +	else if (cfg->offset > cfg->length)
> +		offset = 0;
> +	else
> +		goto end;
> +
> +	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
> +		cfg->offset, offset, cfg->length);
> +	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
> +		FPGA_TO_FPGA);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to
> FPGA");
> +		goto end;
> +	}
> +
> +	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
> +		ctx->data_buf, cfg->length);
> +	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
> +		cfg->length, FPGA_TO_HOST);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to
> host");
> +		goto end;
> +	}
> +	ret = dma_afu_buf_verify(ctx, cfg->length);
> +	if (ret)
> +		goto end;
> +
> +	printf("Sweep with aligned address and size\n");
> +	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
> +	if (ret)
> +		goto end;
> +
> +	if (cfg->unaligned) {
> +		printf("Sweep with unaligned address and size\n");
> +		struct unaligned_set {
> +			uint64_t addr_offset;
> +			uint64_t size_dec;
> +		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
> +		for (i = 0; i < ARRAY_SIZE(param); i++) {
> +			ret = sweep_test(ctx, cfg->length, cfg->offset,
> +				param[i].addr_offset, param[i].size_dec);
> +			if (ret)
> +				break;
> +		}
> +	}
> +
> +end:
> +	/* disable interrupt */
> +	ctrl.global_intr_en_mask = 0;
> +	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
> +
> +free:
> +	dma_afu_buf_free(ctx);
> +	return ret;
> +}
> +
> +static struct rte_pci_device *n3000_afu_get_pci_dev(struct
> afu_mf_rawdev *dev)
> +{
> +	struct rte_afu_device *afudev = NULL;
> +
> +	if (!dev || !dev->rawdev || !dev->rawdev->device)
> +		return NULL;
> +
> +	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
> +	if (!afudev->rawdev || !afudev->rawdev->device)
> +		return NULL;
> +
> +	return RTE_DEV_TO_PCI(afudev->rawdev->device);
> +}
> +
> +#ifdef VFIO_PRESENT
> +static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
> +	uint32_t count, int *efds)
> +{
> +	struct rte_pci_device *pci_dev = NULL;
> +	struct vfio_irq_set *irq_set = NULL;
> +	int vfio_dev_fd = 0;
> +	size_t sz = 0;
> +	int ret = 0;
> +
> +	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
> +		return -EINVAL;
> +
> +	pci_dev = n3000_afu_get_pci_dev(dev);
> +	if (!pci_dev)
> +		return -ENODEV;
> +	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
> +
> +	sz = sizeof(*irq_set) + sizeof(*efds) * count;
> +	irq_set = rte_zmalloc(NULL, sz, 0);
> +	if (!irq_set)
> +		return -ENOMEM;
> +
> +	irq_set->argsz = (uint32_t)sz;
> +	irq_set->count = count;
> +	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
> +		VFIO_IRQ_SET_ACTION_TRIGGER;
> +	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> +	irq_set->start = vec_start;
> +
> +	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
> +	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> +	if (ret)
> +		AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
> +
> +	rte_free(irq_set);
> +	return ret;
> +}
> +#endif
> +
> +static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
> +{
> +	struct rte_pci_device *pci_dev = NULL;
> +	uint8_t *addr = NULL;
> +	uint64_t val = 0;
> +	uint32_t bar = 0;
> +
> +	pci_dev = n3000_afu_get_pci_dev(dev);
> +	if (!pci_dev)
> +		return NULL;
> +
> +	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
> +	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
> +	if (!PORT_IMPLEMENTED(val)) {
> +		AFU_MF_PMD_INFO("FIU port %d is not implemented",
> dev->port);
> +		return NULL;
> +	}
> +
> +	bar = PORT_BAR(val);
> +	if (bar >= PCI_MAX_RESOURCE) {
> +		AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
> +		return NULL;
> +	}
> +
> +	addr = (uint8_t *)pci_dev->mem_resource[bar].addr +
> PORT_OFFSET(val);
> +	return addr;
> +}
> +
> +static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
> +	uint32_t *vec_start, uint32_t *vec_count)
> +{
> +	uint8_t *addr = NULL;
> +	uint64_t val = 0;
> +	uint64_t header = 0;
> +	uint64_t next_offset = 0;
> +
> +	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
> +	if (!addr)
> +		return -ENOENT;
> +
> +	do {
> +		addr += next_offset;
> +		header = rte_read64(addr);
> +		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
> +			(DFH_FEATURE_ID(header) ==
> PORT_FEATURE_UINT_ID)) {
> +			val = rte_read64(addr + PORT_UINT_CAP_REG);
> +			if (vec_start)
> +				*vec_start = PORT_VEC_START(val);
> +			if (vec_count)
> +				*vec_count = PORT_VEC_COUNT(val);
> +			return 0;
> +		}
> +		next_offset = DFH_NEXT_OFFSET(header);
> +		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
> +			break;
> +	} while (!DFH_EOL(header));
> +
> +	return -ENOENT;
> +}
> +
> +static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct nlb_afu_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->nlb_ctx;
> +
> +	rte_free(ctx->dsm_ptr);
> +	ctx->dsm_ptr = NULL;
> +	ctx->status_ptr = NULL;
> +
> +	rte_free(ctx->src_ptr);
> +	ctx->src_ptr = NULL;
> +
> +	rte_free(ctx->dest_ptr);
> +	ctx->dest_ptr = NULL;
> +
> +	return 0;
> +}
> +
> +static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct nlb_afu_ctx *ctx = NULL;
> +	int ret = 0;
> +
> +	if (!dev || !addr)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->nlb_ctx;
> +	ctx->addr = addr;
> +
> +	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE,
> TEST_MEM_ALIGN);
> +	if (!ctx->dsm_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
> +	if (ctx->dsm_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->src_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
> +	if (ctx->src_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->dest_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
> +	if (ctx->dest_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr +
> DSM_STATUS);
> +	return 0;
> +
> +release:
> +	nlb_afu_ctx_release(dev);
> +	return ret;
> +}
> +
> +static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct dma_afu_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->dma_ctx[0];
> +
> +	rte_free(ctx->desc_buf);
> +	ctx->desc_buf = NULL;
> +
> +	rte_free(ctx->magic_buf);
> +	ctx->magic_buf = NULL;
> +
> +	close(ctx->event_fd);
> +	return 0;
> +}
> +
> +static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t
> *addr)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct dma_afu_ctx *ctx = NULL;
> +	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000,
> 0x1000000};
> +	static int efds[1] = {0};
> +	uint32_t vec_start = 0;
> +	int ret = 0;
> +
> +	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->dma_ctx[index];
> +	ctx->index = index;
> +	ctx->addr = addr;
> +	ctx->csr_addr = addr + DMA_CSR;
> +	ctx->desc_addr = addr + DMA_DESC;
> +	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
> +	ctx->ase_data_addr = addr + DMA_ASE_DATA;
> +	ctx->mem_size = mem_sz[ctx->index];
> +	ctx->cur_ase_page = INVALID_ASE_PAGE;
> +	if (ctx->index == 0) {
> +		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
> +		if (ret)
> +			return ret;
> +
> +		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +		if (efds[0] < 0) {
> +			AFU_MF_PMD_ERR("eventfd create failed");
> +			return -EBADF;
> +		}
> +#ifdef VFIO_PRESENT
> +		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
> +			AFU_MF_PMD_ERR("DMA interrupt setup failed");
> +#endif
> +	}
> +	ctx->event_fd = efds[0];
> +
> +	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
> +		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
> +	if (!ctx->desc_buf) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->magic_buf) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
> +	if (ctx->magic_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	return 0;
> +
> +release:
> +	dma_afu_ctx_release(dev);
> +	return ret;
> +}
> +
> +static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	uint8_t *addr = NULL;
> +	uint64_t header = 0;
> +	uint64_t uuid_hi = 0;
> +	uint64_t uuid_lo = 0;
> +	uint64_t next_offset = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	addr = (uint8_t *)dev->addr;
> +	do {
> +		addr += next_offset;
> +		header = rte_read64(addr);
> +		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
> +		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
> +
> +		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
> +			(uuid_lo == N3000_NLB0_UUID_L) &&
> +			(uuid_hi == N3000_NLB0_UUID_H)) {
> +			AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void
> *)addr);
> +			ret = nlb_afu_ctx_init(dev, addr);
> +			if (ret)
> +				return ret;
> +		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
> +			(uuid_lo == N3000_DMA_UUID_L) &&
> +			(uuid_hi == N3000_DMA_UUID_H) &&
> +			(priv->num_dma < NUM_N3000_DMA)) {
> +			AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
> +				priv->num_dma, (void *)addr);
> +			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
> +			if (ret)
> +				return ret;
> +			priv->num_dma++;
> +		} else {
> +			AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
> +				", uuid %016"PRIx64"%016"PRIx64,
> +				DFH_TYPE(header), uuid_hi, uuid_lo);
> +		}
> +
> +		next_offset = DFH_NEXT_OFFSET(header);
> +		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
> +			break;
> +	} while (!DFH_EOL(header));
> +
> +	return 0;
> +}
> +
> +static int n3000_afu_init(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv) {
> +		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv),
> 0);
> +		if (!dev->priv)
> +			return -ENOMEM;
> +	}
> +
> +	return n3000_afu_ctx_init(dev);
> +}
> +
> +static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
> +	int i = 0;
> +	uint64_t top = 0;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
> +		return -EINVAL;
> +
> +	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
> +	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
> +		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
> +			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
> +			return -EINVAL;
> +		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
> +			return -EINVAL;
> +		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
> +			return -EINVAL;
> +		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.multi_cl != 1) &&
> +			(cfg->nlb_cfg.multi_cl != 2) &&
> +			(cfg->nlb_cfg.multi_cl != 4))
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
> +			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
> +			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
> +			return -EINVAL;
> +		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
> +			sizeof(struct rte_pmd_afu_nlb_cfg));
> +	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
> +		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
> +			return -EINVAL;
> +		i = cfg->dma_cfg.index;
> +		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
> +			return -EINVAL;
> +		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
> +			return -EINVAL;
> +		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
> +		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
> +			return -EINVAL;
> +		if (i == 3) {  /* QDR connected to DMA3 */
> +			if (cfg->dma_cfg.length & 0x3f) {
> +				cfg->dma_cfg.length &= ~0x3f;
> +				AFU_MF_PMD_INFO("Round size to %x for
> QDR",
> +					cfg->dma_cfg.length);
> +			}
> +		}
> +		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
> +			sizeof(struct rte_pmd_afu_dma_cfg));
> +	} else {
> +		AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
> +		return -EINVAL;
> +	}
> +
> +	priv->cfg_type = cfg->type;
> +	return 0;
> +}
> +
> +static int n3000_afu_test(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +
> +	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
> +		AFU_MF_PMD_INFO("Test NLB");
> +		ret = nlb_afu_test(dev);
> +	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
> +		AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
> +		ret = dma_afu_test(dev);
> +	} else {
> +		AFU_MF_PMD_ERR("Please configure AFU before test");
> +		ret = -EINVAL;
> +	}
> +
> +	return ret;
> +}
> +
> +static int n3000_afu_close(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	nlb_afu_ctx_release(dev);
> +	dma_afu_ctx_release(dev);
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
> +		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
> +		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
> +		fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
> +		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
> +		fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
> +		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
> +		fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
> +		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
> +	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
> +		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv-
> >dma_cfg.index];
> +		fprintf(f, "index:\t\t%d\n", ctx->index);
> +		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
> +		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
> +		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
> +		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx-
> >ase_data_addr);
> +		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
> +		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
> +		fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
> +	} else {
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int n3000_afu_reset(struct afu_mf_rawdev *dev)
> +{
> +	uint8_t *addr = NULL;
> +	uint64_t val = 0;
> +
> +	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
> +	if (!addr)
> +		return -ENOENT;
> +
> +	val = rte_read64(addr + PORT_CTRL_REG);
> +	val |= PORT_SOFT_RESET;
> +	rte_write64(val, addr + PORT_CTRL_REG);
> +	rte_delay_us(100);
> +	val &= ~PORT_SOFT_RESET;
> +	rte_write64(val, addr + PORT_CTRL_REG);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops n3000_afu_ops = {
> +	.init = n3000_afu_init,
> +	.config = n3000_afu_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = n3000_afu_test,
> +	.close = n3000_afu_close,
> +	.dump = n3000_afu_dump,
> +	.reset = n3000_afu_reset
> +};
> +
> +struct afu_mf_drv n3000_afu_drv = {
> +	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
> +	.ops = &n3000_afu_ops
> +};
> diff --git a/drivers/raw/afu_mf/n3000_afu.h
> b/drivers/raw/afu_mf/n3000_afu.h
> new file mode 100644
> index 0000000..4c740da
> --- /dev/null
> +++ b/drivers/raw/afu_mf/n3000_afu.h
> @@ -0,0 +1,333 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _N3000_AFU_H_
> +#define _N3000_AFU_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define N3000_AFU_UUID_L  0xc000c9660d824272
> +#define N3000_AFU_UUID_H  0x9aeffe5f84570612
> +#define N3000_NLB0_UUID_L 0xf89e433683f9040b
> +#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
> +#define N3000_DMA_UUID_L  0xa9149a35bace01ea
> +#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
> +
> +extern struct afu_mf_drv n3000_afu_drv;
> +
> +#define NUM_N3000_DMA  4
> +#define MAX_MSIX_VEC   7
> +
> +/* N3000 DFL definition */
> +#define DFH_UUID_L_OFFSET  8
> +#define DFH_UUID_H_OFFSET  16
> +#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
> +#define DFH_TYPE_AFU  1
> +#define DFH_TYPE_BBB  2
> +#define DFH_TYPE_PRIVATE  3
> +#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
> +#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
> +#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
> +#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
> +#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
> +#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
> +#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
> +#define PORT_FEATURE_UINT_ID  0x12
> +#define PORT_UINT_CAP_REG  0x8
> +#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
> +#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
> +#define PORT_CTRL_REG  0x38
> +#define PORT_SOFT_RESET  (0x1 << 0)
> +
> +/* NLB registers definition */
> +#define CSR_SCRATCHPAD0    0x100
> +#define CSR_SCRATCHPAD1    0x108
> +#define CSR_AFU_DSM_BASEL  0x110
> +#define CSR_AFU_DSM_BASEH  0x114
> +#define CSR_SRC_ADDR       0x120
> +#define CSR_DST_ADDR       0x128
> +#define CSR_NUM_LINES      0x130
> +#define CSR_CTL            0x138
> +#define CSR_CFG            0x140
> +#define CSR_INACT_THRESH   0x148
> +#define CSR_INTERRUPT0     0x150
> +#define CSR_SWTEST_MSG     0x158
> +#define CSR_STATUS0        0x160
> +#define CSR_STATUS1        0x168
> +#define CSR_ERROR          0x170
> +#define CSR_STRIDE         0x178
> +#define CSR_HE_INFO0       0x180
> +
> +#define DSM_SIZE           0x200000
> +#define DSM_STATUS         0x40
> +#define DSM_POLL_INTERVAL  5  /* ms */
> +#define DSM_TIMEOUT        1000  /* ms */
> +
> +#define NLB_BUF_SIZE  0x400000
> +#define TEST_MEM_ALIGN  1024
> +
> +struct nlb_csr_ctl {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t reset:1;
> +			uint32_t start:1;
> +			uint32_t force_completion:1;
> +			uint32_t reserved:29;
> +		};
> +	};
> +};
> +
> +struct nlb_csr_cfg {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t wrthru_en:1;
> +			uint32_t cont:1;
> +			uint32_t mode:3;
> +			uint32_t multicl_len:2;
> +			uint32_t rsvd1:1;
> +			uint32_t delay_en:1;
> +			uint32_t rdsel:2;
> +			uint32_t rsvd2:1;
> +			uint32_t chsel:3;
> +			uint32_t rsvd3:1;
> +			uint32_t wrpush_i:1;
> +			uint32_t wr_chsel:3;
> +			uint32_t rsvd4:3;
> +			uint32_t test_cfg:5;
> +			uint32_t interrupt_on_error:1;
> +			uint32_t interrupt_testmode:1;
> +			uint32_t wrfence_chsel:2;
> +		};
> +	};
> +};
> +
> +struct nlb_status0 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_writes;
> +			uint32_t num_reads;
> +		};
> +	};
> +};
> +
> +struct nlb_status1 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_pend_writes;
> +			uint32_t num_pend_reads;
> +		};
> +	};
> +};
> +
> +struct nlb_dsm_status {
> +	uint32_t test_complete;
> +	uint32_t test_error;
> +	uint64_t num_clocks;
> +	uint32_t num_reads;
> +	uint32_t num_writes;
> +	uint32_t start_overhead;
> +	uint32_t end_overhead;
> +};
> +
> +/* DMA registers definition */
> +#define DMA_CSR       0x40
> +#define DMA_DESC      0x60
> +#define DMA_ASE_CTRL  0x200
> +#define DMA_ASE_DATA  0x1000
> +
> +#define DMA_ASE_WINDOW       4096
> +#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
> +#define INVALID_ASE_PAGE     0xffffffffffffffffULL
> +
> +#define DMA_WF_MAGIC             0x5772745F53796E63ULL
> +#define DMA_WF_MAGIC_ROM         0x1000000000000
> +#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
> +#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
> +
> +#define NUM_DMA_BUF   8
> +#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
> +
> +#define DMA_MASK_32_BIT 0xFFFFFFFF
> +
> +#define DMA_CSR_BUSY           0x1
> +#define DMA_DESC_BUFFER_EMPTY  0x2
> +#define DMA_DESC_BUFFER_FULL   0x4
> +
> +#define DWORD_BYTES 4
> +#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
> +
> +#define QWORD_BYTES 8
> +#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
> +
> +#define DMA_ALIGN_BYTES 64
> +#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
> +
> +#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
> +
> +#define DMA_TIMEOUT_MSEC  5000
> +
> +#define MAGIC_BUF_SIZE  64
> +#define ERR_CHECK_LIMIT  64
> +
> +#ifndef MIN
> +#define MIN(a, b) ((a) < (b) ? (a) : (b))
> +#endif
> +
> +#ifndef ARRAY_SIZE
> +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
> +#endif
> +
> +typedef enum {
> +	HOST_TO_FPGA = 0,
> +	FPGA_TO_HOST,
> +	FPGA_TO_FPGA,
> +	FPGA_MAX_TRANSFER_TYPE,
> +} fpga_dma_type;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t tx_channel:8;
> +		uint32_t generate_sop:1;
> +		uint32_t generate_eop:1;
> +		uint32_t park_reads:1;
> +		uint32_t park_writes:1;
> +		uint32_t end_on_eop:1;
> +		uint32_t reserved_1:1;
> +		uint32_t transfer_irq_en:1;
> +		uint32_t early_term_irq_en:1;
> +		uint32_t trans_error_irq_en:8;
> +		uint32_t early_done_en:1;
> +		uint32_t reserved_2:6;
> +		uint32_t go:1;
> +	};
> +} msgdma_desc_ctrl;
> +
> +typedef struct __rte_packed {
> +	uint32_t rd_address;
> +	uint32_t wr_address;
> +	uint32_t len;
> +	uint16_t seq_num;
> +	uint8_t rd_burst_count;
> +	uint8_t wr_burst_count;
> +	uint16_t rd_stride;
> +	uint16_t wr_stride;
> +	uint32_t rd_address_ext;
> +	uint32_t wr_address_ext;
> +	msgdma_desc_ctrl control;
> +} msgdma_ext_desc;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t busy:1;
> +		uint32_t desc_buf_empty:1;
> +		uint32_t desc_buf_full:1;
> +		uint32_t rsp_buf_empty:1;
> +		uint32_t rsp_buf_full:1;
> +		uint32_t stopped:1;
> +		uint32_t resetting:1;
> +		uint32_t stopped_on_error:1;
> +		uint32_t stopped_on_early_term:1;
> +		uint32_t irq:1;
> +		uint32_t reserved:22;
> +	};
> +} msgdma_status;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t stop_dispatcher:1;
> +		uint32_t reset_dispatcher:1;
> +		uint32_t stop_on_error:1;
> +		uint32_t stopped_on_early_term:1;
> +		uint32_t global_intr_en_mask:1;
> +		uint32_t stop_descriptors:1;
> +		uint32_t reserved:22;
> +	};
> +} msgdma_ctrl;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t rd_fill_level:16;
> +		uint32_t wr_fill_level:16;
> +	};
> +} msgdma_fill_level;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t rsp_fill_level:16;
> +		uint32_t reserved:16;
> +	};
> +} msgdma_rsp_level;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t rd_seq_num:16;
> +		uint32_t wr_seq_num:16;
> +	};
> +} msgdma_seq_num;
> +
> +typedef struct __rte_packed {
> +	msgdma_status status;
> +	msgdma_ctrl ctrl;
> +	msgdma_fill_level fill_level;
> +	msgdma_rsp_level rsp;
> +	msgdma_seq_num seq_num;
> +} msgdma_csr;
> +
> +#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
> +#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
> +
> +struct nlb_afu_ctx {
> +	uint8_t *addr;
> +	uint8_t *dsm_ptr;
> +	uint64_t dsm_iova;
> +	uint8_t *src_ptr;
> +	uint64_t src_iova;
> +	uint8_t *dest_ptr;
> +	uint64_t dest_iova;
> +	struct nlb_dsm_status *status_ptr;
> +};
> +
> +struct dma_afu_ctx {
> +	int index;
> +	uint8_t *addr;
> +	uint8_t *csr_addr;
> +	uint8_t *desc_addr;
> +	uint8_t *ase_ctrl_addr;
> +	uint8_t *ase_data_addr;
> +	uint64_t mem_size;
> +	uint64_t cur_ase_page;
> +	int event_fd;
> +	int verbose;
> +	int pattern;
> +	void *data_buf;
> +	void *ref_buf;
> +	msgdma_ext_desc *desc_buf;
> +	uint64_t *magic_buf;
> +	uint64_t magic_iova;
> +	uint32_t dma_buf_size;
> +	uint64_t *dma_buf[NUM_DMA_BUF];
> +	uint64_t dma_iova[NUM_DMA_BUF];
> +};
> +
> +struct n3000_afu_priv {
> +	struct rte_pmd_afu_nlb_cfg nlb_cfg;
> +	struct rte_pmd_afu_dma_cfg dma_cfg;
> +	struct nlb_afu_ctx nlb_ctx;
> +	struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
> +	int num_dma;
> +	int cfg_type;
> +};
> +
> +#endif /* _N3000_AFU_H_ */
> diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h
> b/drivers/raw/afu_mf/rte_pmd_afu.h
> new file mode 100644
> index 0000000..89d866a
> --- /dev/null
> +++ b/drivers/raw/afu_mf/rte_pmd_afu.h
> @@ -0,0 +1,134 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2022 Intel Corporation
> + */
> +
> +#ifndef __RTE_PMD_AFU_H__
> +#define __RTE_PMD_AFU_H__
> +
> +/**
> + * @file rte_pmd_afu.h
> + *
> + * AFU PMD specific definitions.
> + *
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdint.h>
> +
> +#define RTE_PMD_AFU_N3000_NLB   1
> +#define RTE_PMD_AFU_N3000_DMA   2
> +
> +#define NLB_MODE_LPBK      0
> +#define NLB_MODE_READ      1
> +#define NLB_MODE_WRITE     2
> +#define NLB_MODE_TRPUT     3
> +
> +#define NLB_VC_AUTO        0
> +#define NLB_VC_VL0         1
> +#define NLB_VC_VH0         2
> +#define NLB_VC_VH1         3
> +#define NLB_VC_RANDOM      4
> +
> +#define NLB_WRLINE_M       0
> +#define NLB_WRLINE_I       1
> +#define NLB_WRPUSH_I       2
> +
> +#define NLB_RDLINE_S       0
> +#define NLB_RDLINE_I       1
> +#define NLB_RDLINE_MIXED   2
> +
> +#define MIN_CACHE_LINES   1
> +#define MAX_CACHE_LINES   1024
> +
> +#define MIN_DMA_BUF_SIZE  64
> +#define MAX_DMA_BUF_SIZE  (1023 * 1024)
> +
> +/**
> + * NLB AFU configuration data structure.
> + */
> +struct rte_pmd_afu_nlb_cfg {
> +	uint32_t mode;
> +	uint32_t begin;
> +	uint32_t end;
> +	uint32_t multi_cl;
> +	uint32_t cont;
> +	uint32_t timeout;
> +	uint32_t cache_policy;
> +	uint32_t cache_hint;
> +	uint32_t read_vc;
> +	uint32_t write_vc;
> +	uint32_t wrfence_vc;
> +	uint32_t freq_mhz;
> +};
> +
> +/**
> + * DMA AFU configuration data structure.
> + */
> +struct rte_pmd_afu_dma_cfg {
> +	uint32_t index;     /* index of DMA controller */
> +	uint32_t length;    /* total length of data to DMA */
> +	uint32_t offset;    /* address offset of target memory */
> +	uint32_t size;      /* size of transfer buffer */
> +	uint32_t pattern;   /* data pattern to fill in test buffer */
> +	uint32_t unaligned; /* use unaligned address or length in sweep test
> */
> +	uint32_t verbose;   /* enable verbose error information in test */
> +};
> +
> +/**
> + * N3000 AFU configuration data structure.
> + */
> +struct rte_pmd_afu_n3000_cfg {
> +	int type;   /* RTE_PMD_AFU_N3000_NLB or
> RTE_PMD_AFU_N3000_DMA */
> +	union {
> +		struct rte_pmd_afu_nlb_cfg nlb_cfg;
> +		struct rte_pmd_afu_dma_cfg dma_cfg;
> +	};
> +};
> +
> +/**
> + * HE-LBK & HE-MEM-LBK AFU configuration data structure.
> + */
> +struct rte_pmd_afu_he_lbk_cfg {
> +	uint32_t mode;
> +	uint32_t begin;
> +	uint32_t end;
> +	uint32_t multi_cl;
> +	uint32_t cont;
> +	uint32_t timeout;
> +	uint32_t trput_interleave;
> +	uint32_t freq_mhz;
> +};
> +
> +/**
> + * HE-MEM-TG AFU configuration data structure.
> + */
> +struct rte_pmd_afu_he_mem_tg_cfg {
> +	uint32_t channel_mask;   /* mask of traffic generator channel */
> +};
> +
> +/**
> + * HE-HSSI AFU configuration data structure.
> + */
> +struct rte_pmd_afu_he_hssi_cfg {
> +	uint32_t port;
> +	uint32_t timeout;
> +	uint32_t num_packets;
> +	uint32_t random_length;
> +	uint32_t packet_length;
> +	uint32_t random_payload;
> +	uint32_t rnd_seed[3];
> +	uint64_t src_addr;
> +	uint64_t dest_addr;
> +	int he_loopback;
> +};
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PMD_AFU_H__ */
> diff --git a/drivers/raw/afu_mf/version.map
> b/drivers/raw/afu_mf/version.map
> new file mode 100644
> index 0000000..c2e0723
> --- /dev/null
> +++ b/drivers/raw/afu_mf/version.map
> @@ -0,0 +1,3 @@
> +DPDK_22 {
> +	local: *;
> +};
> diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
> index 05e7de1..c3627f7 100644
> --- a/drivers/raw/meson.build
> +++ b/drivers/raw/meson.build
> @@ -6,6 +6,7 @@ if is_windows
>  endif
> 
>  drivers = [
> +        'afu_mf',
>          'cnxk_bphy',
>          'cnxk_gpio',
>          'dpaa2_cmdif',
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 0/5] introduce afu_mf raw device driver
  2022-05-19  5:52     ` [PATCH v4] " Wei Huang
  2022-05-26  6:51       ` Xu, Rosen
@ 2022-05-27  5:36       ` Wei Huang
  2022-05-27  5:37         ` [PATCH v5 1/5] drivers/raw: introduce AFU " Wei Huang
                           ` (6 more replies)
  1 sibling, 7 replies; 57+ messages in thread
From: Wei Huang @ 2022-05-27  5:36 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

The first patch implements the framework of the AFU raw device
driver.
The subsequent patches implement the driver of some AFUs.

Wei Huang (5):
  drivers/raw: introduce AFU raw device driver
  raw/afu_mf: add N3000 AFU driver
  raw/afu_mf: add HE-LBK AFU driver
  raw/afu_mf: add HE-MEM AFU driver
  raw/afu_mf: add HE-HSSI AFU driver

 drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
 drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
 drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
 drivers/raw/afu_mf/he_hssi.h       |  102 ++
 drivers/raw/afu_mf/he_lbk.c        |  427 ++++++++
 drivers/raw/afu_mf/he_lbk.h        |  121 +++
 drivers/raw/afu_mf/he_mem.c        |  181 ++++
 drivers/raw/afu_mf/he_mem.h        |   40 +
 drivers/raw/afu_mf/meson.build     |    8 +
 drivers/raw/afu_mf/n3000_afu.c     | 2005 ++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
 drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
 drivers/raw/afu_mf/version.map     |    3 +
 drivers/raw/meson.build            |    1 +
 14 files changed, 4253 insertions(+)
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
 create mode 100644 drivers/raw/afu_mf/he_hssi.c
 create mode 100644 drivers/raw/afu_mf/he_hssi.h
 create mode 100644 drivers/raw/afu_mf/he_lbk.c
 create mode 100644 drivers/raw/afu_mf/he_lbk.h
 create mode 100644 drivers/raw/afu_mf/he_mem.c
 create mode 100644 drivers/raw/afu_mf/he_mem.h
 create mode 100644 drivers/raw/afu_mf/meson.build
 create mode 100644 drivers/raw/afu_mf/n3000_afu.c
 create mode 100644 drivers/raw/afu_mf/n3000_afu.h
 create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
 create mode 100644 drivers/raw/afu_mf/version.map

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
@ 2022-05-27  5:37         ` Wei Huang
  2022-06-06  1:52           ` Zhang, Tianfei
  2022-06-06 15:38           ` Stephen Hemminger
  2022-05-27  5:37         ` [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver Wei Huang
                           ` (5 subsequent siblings)
  6 siblings, 2 replies; 57+ messages in thread
From: Wei Huang @ 2022-05-27  5:37 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

Add multi-function AFU raw device driver to manage various AFU
(Acceleration Function Unit) in FPGA.
This driver implements common AFU raw device interfaces and
exposes them to application as standard raw device APIs.
Normal application can operate specified AFU as below,
1. call rte_rawdev_pmd_get_named_dev() to find AFU raw device.
2. call rte_rawdev_configure() to initialize AFU raw device.
3. call rte_rawdev_selftest() to test function of AFU.

Signed-off-by: Wei Huang <wei.huang@intel.com>
---
v2: fix typo
---
v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
---
v4: fix coding style issue and build error in FreeBSD13-64
---
v5: split patch into several patches
---
 drivers/raw/afu_mf/afu_mf_rawdev.c | 425 +++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/afu_mf_rawdev.h |  71 +++++++
 drivers/raw/afu_mf/meson.build     |   5 +
 drivers/raw/afu_mf/version.map     |   3 +
 drivers/raw/meson.build            |   1 +
 5 files changed, 505 insertions(+)
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
 create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
 create mode 100644 drivers/raw/afu_mf/meson.build
 create mode 100644 drivers/raw/afu_mf/version.map

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
new file mode 100644
index 0000000..5be372a
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -0,0 +1,425 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_rawdev_pmd.h>
+
+#include "afu_mf_rawdev.h"
+
+#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
+
+static const struct rte_afu_uuid afu_uuid_map[] = {
+	{ 0, 0 /* sentinel */ }
+};
+
+static struct afu_mf_drv *afu_table[] = {
+	NULL
+};
+
+static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
+{
+	int32_t x = 0;
+
+	if (!dev || !dev->shared)
+		return -ENODEV;
+
+	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
+
+	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1,
+				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0))
+		return -EBUSY;
+
+	return 0;
+}
+
+static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
+{
+	if (!dev || !dev->shared)
+		return;
+
+	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
+}
+
+static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
+	rte_rawdev_obj_t config, size_t config_size)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->config)
+		ret = (*dev->ops->config)(dev, config, config_size);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_start(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please start it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->start)
+		ret = (*dev->ops->start)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
+		return;
+	}
+
+	if (dev->ops && dev->ops->stop)
+		ret = (*dev->ops->stop)(dev);
+
+	afu_mf_unlock(dev);
+}
+
+static int afu_mf_rawdev_close(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->close)
+		ret = (*dev->ops->close)(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->reset)
+		ret = (*dev->ops->reset)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_selftest(uint16_t dev_id)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
+		return -ENODEV;
+
+	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
+	if (!dev)
+		return -ENOENT;
+
+	ret = afu_mf_trylock(dev);
+	if (ret) {
+		AFU_MF_PMD_WARN("AFU is busy, please test it later");
+		return ret;
+	}
+
+	if (dev->ops && dev->ops->test)
+		ret = (*dev->ops->test)(dev);
+
+	afu_mf_unlock(dev);
+
+	return ret;
+}
+
+static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f)
+{
+	struct afu_mf_rawdev *dev = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_FUNC_TRACE();
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		return -ENODEV;
+
+	if (dev->ops && dev->ops->dump)
+		ret = (*dev->ops->dump)(dev, f);
+
+	return ret;
+}
+
+static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
+	.dev_info_get = NULL,
+	.dev_configure = afu_mf_rawdev_configure,
+	.dev_start = afu_mf_rawdev_start,
+	.dev_stop = afu_mf_rawdev_stop,
+	.dev_close = afu_mf_rawdev_close,
+	.dev_reset = afu_mf_rawdev_reset,
+
+	.queue_def_conf = NULL,
+	.queue_setup = NULL,
+	.queue_release = NULL,
+	.queue_count = NULL,
+
+	.attr_get = NULL,
+	.attr_set = NULL,
+
+	.enqueue_bufs = NULL,
+	.dequeue_bufs = NULL,
+
+	.dump = afu_mf_rawdev_dump,
+
+	.xstats_get = NULL,
+	.xstats_get_names = NULL,
+	.xstats_get_by_name = NULL,
+	.xstats_reset = NULL,
+
+	.firmware_status_get = NULL,
+	.firmware_version_get = NULL,
+	.firmware_load = NULL,
+	.firmware_unload = NULL,
+
+	.dev_selftest = afu_mf_rawdev_selftest,
+};
+
+static int
+afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
+	int socket_id)
+{
+	const struct rte_memzone *mz;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	struct afu_mf_shared *ptr = NULL;
+	int init_mz = 0;
+
+	if (!name || !data)
+		return -EINVAL;
+
+	/* name format is afu_?|??:??.? which is unique */
+	snprintf(mz_name, sizeof(mz_name), "%s", name);
+
+	mz = rte_memzone_lookup(mz_name);
+	if (!mz) {
+		mz = rte_memzone_reserve(mz_name,
+				sizeof(struct afu_mf_shared),
+				socket_id, 0);
+		init_mz = 1;
+	}
+
+	if (!mz) {
+		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
+			mz_name);
+		return -ENOMEM;
+	}
+
+	ptr = (struct afu_mf_shared *)mz->addr;
+
+	if (init_mz)  /* initialize memory zone on the first time */
+		ptr->lock = 0;
+
+	*data = ptr;
+
+	return 0;
+}
+
+static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char *name,
+	size_t size)
+{
+	int n = 0;
+
+	if (!afu_dev || !name || !size)
+		return -EINVAL;
+
+	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
+	if (n >= (int)size) {
+		AFU_MF_PMD_ERR("Name of AFU device is too long!");
+		return -ENAMETOOLONG;
+	}
+
+	return 0;
+}
+
+static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
+{
+	struct afu_mf_drv *entry = NULL;
+	int i = 0;
+
+	if (!afu_id)
+		return NULL;
+
+	while ((entry = afu_table[i++])) {
+		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
+			(entry->uuid.uuid_high == afu_id->uuid_high))
+			break;
+	}
+
+	return entry ? entry->ops : NULL;
+}
+
+static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int socket_id)
+{
+	struct rte_rawdev *rawdev = NULL;
+	struct afu_mf_rawdev *dev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
+		name, socket_id);
+
+	/* Allocate device structure */
+	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct afu_mf_rawdev),
+				socket_id);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Unable to allocate raw device");
+		return -ENOMEM;
+	}
+
+	rawdev->dev_ops = &afu_mf_rawdev_ops;
+	rawdev->device = &afu_dev->device;
+	rawdev->driver_name = afu_dev->driver->driver.name;
+
+	dev = afu_mf_rawdev_get_priv(rawdev);
+	if (!dev)
+		goto cleanup;
+
+	dev->rawdev = rawdev;
+	dev->port = afu_dev->id.port;
+	dev->addr = afu_dev->mem_resource[0].addr;
+	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
+	if (dev->ops == NULL) {
+		AFU_MF_PMD_ERR("Unsupported AFU device");
+		goto cleanup;
+	}
+
+	if (dev->ops->init) {
+		ret = (*dev->ops->init)(dev);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed to init %s", name);
+			goto cleanup;
+		}
+	}
+
+	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
+	if (ret)
+		goto cleanup;
+
+	return ret;
+
+cleanup:
+	rte_rawdev_pmd_release(rawdev);
+	return ret;
+}
+
+static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev)
+{
+	struct rte_rawdev *rawdev = NULL;
+	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
+	int ret = 0;
+
+	if (!afu_dev)
+		return -EINVAL;
+
+	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
+	if (ret)
+		return ret;
+
+	AFU_MF_PMD_INFO("Destroy raw device %s", name);
+
+	rawdev = rte_rawdev_pmd_get_named_dev(name);
+	if (!rawdev) {
+		AFU_MF_PMD_ERR("Raw device %s not found", name);
+		return -EINVAL;
+	}
+
+	/* rte_rawdev_close is called by pmd_release */
+	ret = rte_rawdev_pmd_release(rawdev);
+	if (ret)
+		AFU_MF_PMD_DEBUG("Device cleanup failed");
+
+	return 0;
+}
+
+static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_create(afu_dev, rte_socket_id());
+}
+
+static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev)
+{
+	AFU_MF_PMD_FUNC_TRACE();
+	return afu_mf_rawdev_destroy(afu_dev);
+}
+
+static struct rte_afu_driver afu_mf_pmd_drv = {
+	.id_table = afu_uuid_map,
+	.probe = afu_mf_rawdev_probe,
+	.remove = afu_mf_rawdev_remove
+};
+
+RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv);
+RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h b/drivers/raw/afu_mf/afu_mf_rawdev.h
new file mode 100644
index 0000000..df6715c
--- /dev/null
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __AFU_MF_RAWDEV_H__
+#define __AFU_MF_RAWDEV_H__
+
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <rte_cycles.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+extern int afu_mf_pmd_logtype;
+
+#define AFU_MF_PMD_LOG(level, fmt, args...) \
+	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
+		__func__, ##args)
+
+#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
+
+#define AFU_MF_PMD_DEBUG(fmt, args...) \
+	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
+#define AFU_MF_PMD_INFO(fmt, args...) \
+	AFU_MF_PMD_LOG(INFO, fmt, ## args)
+#define AFU_MF_PMD_ERR(fmt, args...) \
+	AFU_MF_PMD_LOG(ERR, fmt, ## args)
+#define AFU_MF_PMD_WARN(fmt, args...) \
+	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
+
+struct afu_mf_rawdev;
+
+struct afu_mf_ops {
+	int (*init)(struct afu_mf_rawdev *dev);
+	int (*config)(struct afu_mf_rawdev *dev, void *config,
+		size_t config_size);
+	int (*start)(struct afu_mf_rawdev *dev);
+	int (*stop)(struct afu_mf_rawdev *dev);
+	int (*test)(struct afu_mf_rawdev *dev);
+	int (*close)(struct afu_mf_rawdev *dev);
+	int (*reset)(struct afu_mf_rawdev *dev);
+	int (*dump)(struct afu_mf_rawdev *dev, FILE *f);
+};
+
+struct afu_mf_drv {
+	struct rte_afu_uuid uuid;
+	struct afu_mf_ops *ops;
+};
+
+struct afu_mf_shared {
+	int32_t lock;
+};
+
+struct afu_mf_rawdev {
+	struct rte_rawdev *rawdev;  /* point to parent raw device */
+	struct afu_mf_shared *shared;  /* shared data for multi-process */
+	struct afu_mf_ops *ops;  /* device operation functions */
+	int port;  /* index of port the AFU attached */
+	void *addr;  /* base address of AFU registers */
+	void *priv;  /* private driver data */
+};
+
+static inline struct afu_mf_rawdev *
+afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev)
+{
+	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL;
+}
+
+#endif /* __AFU_MF_RAWDEV_H__ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
new file mode 100644
index 0000000..80526a2
--- /dev/null
+++ b/drivers/raw/afu_mf/meson.build
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2022 Intel Corporation
+
+deps += ['rawdev', 'bus_pci', 'bus_ifpga']
+sources = files('afu_mf_rawdev.c')
diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map
new file mode 100644
index 0000000..c2e0723
--- /dev/null
+++ b/drivers/raw/afu_mf/version.map
@@ -0,0 +1,3 @@
+DPDK_22 {
+	local: *;
+};
diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
index 05e7de1..c3627f7 100644
--- a/drivers/raw/meson.build
+++ b/drivers/raw/meson.build
@@ -6,6 +6,7 @@ if is_windows
 endif
 
 drivers = [
+        'afu_mf',
         'cnxk_bphy',
         'cnxk_gpio',
         'dpaa2_cmdif',
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
  2022-05-27  5:37         ` [PATCH v5 1/5] drivers/raw: introduce AFU " Wei Huang
@ 2022-05-27  5:37         ` Wei Huang
  2022-06-06  1:38           ` Zhang, Tianfei
  2022-05-27  5:37         ` [PATCH v5 3/5] raw/afu_mf: add HE-LBK " Wei Huang
                           ` (4 subsequent siblings)
  6 siblings, 1 reply; 57+ messages in thread
From: Wei Huang @ 2022-05-27  5:37 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

N3000 AFU includes NLB0 and DMA modules, NLB0 is used to test PCI bus
and DMA is used to test local memory.
This driver initialize the modules and report test result.

Signed-off-by: Wei Huang <wei.huang@intel.com>
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |    4 +
 drivers/raw/afu_mf/afu_mf_rawdev.h |   18 +
 drivers/raw/afu_mf/meson.build     |    4 +-
 drivers/raw/afu_mf/n3000_afu.c     | 2005 ++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
 drivers/raw/afu_mf/rte_pmd_afu.h   |   97 ++
 6 files changed, 2460 insertions(+), 1 deletion(-)
 create mode 100644 drivers/raw/afu_mf/n3000_afu.c
 create mode 100644 drivers/raw/afu_mf/n3000_afu.h
 create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
index 5be372a..7c18f3b 100644
--- a/drivers/raw/afu_mf/afu_mf_rawdev.c
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -17,15 +17,19 @@
 #include <rte_memzone.h>
 #include <rte_rawdev_pmd.h>
 
+#include "rte_pmd_afu.h"
 #include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
 
 #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
 
 static const struct rte_afu_uuid afu_uuid_map[] = {
+	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
 	{ 0, 0 /* sentinel */ }
 };
 
 static struct afu_mf_drv *afu_table[] = {
+	&n3000_afu_drv,
 	NULL
 };
 
diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h b/drivers/raw/afu_mf/afu_mf_rawdev.h
index df6715c..5a66f6c 100644
--- a/drivers/raw/afu_mf/afu_mf_rawdev.h
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
@@ -30,6 +30,24 @@
 #define AFU_MF_PMD_WARN(fmt, args...) \
 	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
 
+#define CLS_TO_SIZE(n)  ((n) << 6)  /* get size of n cache lines */
+#define SIZE_TO_CLS(s)  ((s) >> 6)  /* convert size to number of cache lines */
+#define MHZ(f)  ((f) * 1000000)
+
+#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
+({                                                       \
+	uint64_t __wait = 0;                                 \
+	uint64_t __invl = (invl);                            \
+	uint64_t __timeout = (timeout);                      \
+	for (; __wait <= __timeout; __wait += __invl) {      \
+		(val) = *(addr);                                 \
+		if (cond)                                        \
+			break;                                       \
+		rte_delay_ms(__invl);                            \
+	}                                                    \
+	(cond) ? 0 : 1;                                      \
+})
+
 struct afu_mf_rawdev;
 
 struct afu_mf_ops {
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
index 80526a2..8a989e3 100644
--- a/drivers/raw/afu_mf/meson.build
+++ b/drivers/raw/afu_mf/meson.build
@@ -2,4 +2,6 @@
 # Copyright 2022 Intel Corporation
 
 deps += ['rawdev', 'bus_pci', 'bus_ifpga']
-sources = files('afu_mf_rawdev.c')
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c')
+
+headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c
new file mode 100644
index 0000000..19d7c54
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.c
@@ -0,0 +1,2005 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "n3000_afu.h"
+
+static int nlb_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->nlb_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	if (cfg->cache_policy == NLB_WRPUSH_I)
+		v.wrpush_i = 1;
+	else
+		v.wrthru_en = cfg->cache_policy;
+
+	if (cfg->cache_hint == NLB_RDLINE_MIXED)
+		v.rdsel = 3;
+	else
+		v.rdsel = cfg->cache_hint;
+
+	v.mode = cfg->mode;
+	v.chsel = cfg->read_vc;
+	v.wr_chsel = cfg->write_vc;
+	v.wrfence_chsel = cfg->wrfence_vc;
+	v.wrthru_en = cfg->cache_policy;
+	v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_dsm_status *stat = NULL;
+	uint64_t ticks = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	cfg = &priv->nlb_cfg;
+	stat = priv->nlb_ctx.status_ptr;
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0)
+		cfg->freq_mhz = 200;
+
+	num = (double)stat->num_reads;
+	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat->num_writes;
+	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
+		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
+	printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat->num_reads, stat->num_writes, ticks,
+		rd_bw / 1e9, wr_bw / 1e9);
+}
+
+static int nlb_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
+	struct nlb_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	ctx = &priv->nlb_ctx;
+	cfg = &priv->nlb_cfg;
+
+	/* initialize registers */
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
+
+	ret = nlb_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CLS_TO_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
+
+		rte_delay_us(10);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		nlb_afu_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		ptr = (uint32_t *)ctx->dest_ptr;
+		j = CLS_TO_SIZE(cl) >> 2;
+		for (i = 0; i < j; i++) {
+			if (*ptr++ != i) {
+				AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+				break;
+			}
+		}
+	}
+
+end:
+	return ret;
+}
+
+static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
+{
+	int i = 0;
+
+	if (!ctx)
+		return;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		rte_free(ctx->dma_buf[i]);
+		ctx->dma_buf[i] = NULL;
+	}
+
+	rte_free(ctx->data_buf);
+	ctx->data_buf = NULL;
+
+	rte_free(ctx->ref_buf);
+	ctx->ref_buf = NULL;
+}
+
+static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
+	struct rte_pmd_afu_dma_cfg *cfg)
+{
+	size_t page_sz = sysconf(_SC_PAGE_SIZE);
+	int i, ret = 0;
+
+	if (!ctx || !cfg)
+		return -EINVAL;
+
+	for (i = 0; i < NUM_DMA_BUF; i++) {
+		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
+			TEST_MEM_ALIGN);
+		if (!ctx->dma_buf[i]) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
+		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
+	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->data_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
+	if (!ctx->ref_buf) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
+{
+	int *ptr = NULL;
+	size_t i = 0;
+	size_t dword_size = 0;
+
+	if (!ctx || !size)
+		return;
+
+	ptr = (int *)ctx->ref_buf;
+
+	if (ctx->pattern) {
+		memset(ptr, ctx->pattern, size);
+	} else {
+		srand(99);
+		dword_size = size >> 2;
+		for (i = 0; i < dword_size; i++)
+			*ptr++ = rand();
+	}
+	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
+}
+
+static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
+{
+	uint8_t *src = NULL;
+	uint8_t *dst = NULL;
+	size_t i = 0;
+	int n = 0;
+
+	if (!ctx || !size)
+		return -EINVAL;
+
+	src = (uint8_t *)ctx->ref_buf;
+	dst = (uint8_t *)ctx->data_buf;
+
+	if (memcmp(src, dst, size)) {
+		printf("Transfer is corrupted\n");
+		if (ctx->verbose) {
+			for (i = 0; i < size; i++) {
+				if (*src != *dst) {
+					if (++n >= ERR_CHECK_LIMIT)
+						break;
+					printf("Mismatch at 0x%zx, "
+						"Expected %02x  Actual %02x\n",
+						i, *src, *dst);
+				}
+				src++;
+				dst++;
+			}
+			if (n < ERR_CHECK_LIMIT) {
+				printf("Found %d error bytes\n", n);
+			} else {
+				printf("......\n");
+				printf("Found more than %d error bytes\n", n);
+			}
+		}
+		return -1;
+	}
+
+	printf("Transfer is verified\n");
+	return 0;
+}
+
+static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		rte_write64(*host_addr, dev_addr);
+}
+
+static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
+{
+	uint64_t qwords = bytes / sizeof(uint64_t);
+
+	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
+		!IS_ALIGNED_QWORD((uint64_t)bytes))
+		return;
+
+	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
+		*host_addr = rte_read64(dev_addr);
+}
+
+static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
+{
+	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
+
+	if (!ctx)
+		return;
+
+	if (requested_page != ctx->cur_ase_page) {
+		rte_write64(requested_page, ctx->ase_ctrl_addr);
+		ctx->cur_ase_page = requested_page;
+	}
+}
+
+static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
+
+	/* write back to device */
+	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
+
+	return 0;
+}
+
+static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(dst))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(dst)) {
+		/* Write out a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+
+		rte_write32(*(uint32_t *)(uintptr_t)src,
+			ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Write out blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)(uintptr_t)src, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Write out remaining DWORD */
+		switch_ase_page(ctx, dst);
+		offset = dst & DMA_ASE_WINDOW_MASK;
+		rte_write32(*(uint32_t *)(uintptr_t)src,
+			ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
+	uint64_t *src_ptr, uint64_t count)
+{
+	uint64_t dst = *dst_ptr;
+	uint64_t src = *src_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* aligns address to 8 byte using dst masking method */
+	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
+		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		src += unaligned_size;
+		dst += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_write(ctx, &dst, &src, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using dst masking method */
+	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
+	uint64_t host_addr, uint32_t count)
+{
+	uint64_t dev_aligned_addr = 0;
+	uint64_t shift = 0;
+	uint64_t val = 0;
+	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
+		dev_addr, count);
+
+	if (!ctx || (count >= QWORD_BYTES))
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	switch_ase_page(ctx, dev_addr);
+
+	shift = dev_addr % QWORD_BYTES;
+	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
+	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
+	rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
+
+	return 0;
+}
+
+static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t *count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t align_bytes = *count;
+	uint64_t offset = 0;
+	uint64_t left_in_page = DMA_ASE_WINDOW;
+	uint64_t size_to_copy = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
+		align_bytes);
+
+	if (!ctx || !IS_ALIGNED_DWORD(src))
+		return -EINVAL;
+
+	if (align_bytes < DWORD_BYTES)
+		return 0;
+
+	if (!IS_ALIGNED_QWORD(src)) {
+		/* Read a single DWORD to get QWORD aligned */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)(uintptr_t)dst =
+			rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	if (!align_bytes)
+		return 0;
+
+	/* Read blocks of 64-bit values */
+	while (align_bytes >= QWORD_BYTES) {
+		left_in_page -= src & DMA_ASE_WINDOW_MASK;
+		size_to_copy =
+			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
+		if (size_to_copy < QWORD_BYTES)
+			break;
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
+			(uint64_t *)(uintptr_t)dst, size_to_copy);
+		src += size_to_copy;
+		dst += size_to_copy;
+		align_bytes -= size_to_copy;
+	}
+
+	if (align_bytes >= DWORD_BYTES) {
+		/* Read remaining DWORD */
+		switch_ase_page(ctx, src);
+		offset = src & DMA_ASE_WINDOW_MASK;
+		*(uint32_t *)(uintptr_t)dst =
+			rte_read32(ctx->ase_data_addr + offset);
+		src += DWORD_BYTES;
+		dst += DWORD_BYTES;
+		align_bytes -= DWORD_BYTES;
+	}
+
+	*src_ptr = src;
+	*dst_ptr = dst;
+	*count = align_bytes;
+
+	return 0;
+}
+
+static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
+	uint64_t *dst_ptr, uint64_t count)
+{
+	uint64_t src = *src_ptr;
+	uint64_t dst = *dst_ptr;
+	uint64_t count_left = count;
+	uint64_t unaligned_size = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
+		count);
+
+	/* Aligns address to 8 byte using src masking method */
+	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
+		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+		if (unaligned_size > count_left)
+			unaligned_size = count_left;
+		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+		if (ret)
+			return ret;
+		count_left -= unaligned_size;
+		dst += unaligned_size;
+		src += unaligned_size;
+	}
+
+	/* Handles 8/4 byte MMIO transfer */
+	ret = ase_read(ctx, &src, &dst, &count_left);
+	if (ret)
+		return ret;
+
+	/* Left over unaligned bytes transferred using src masking method */
+	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
+	if (unaligned_size > count_left)
+		unaligned_size = count_left;
+
+	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
+	if (ret)
+		return ret;
+
+	count_left -= unaligned_size;
+	*dst_ptr = dst + unaligned_size;
+	*src_ptr = src + unaligned_size;
+
+	return 0;
+}
+
+static void clear_interrupt(struct dma_afu_ctx *ctx)
+{
+	/* clear interrupt by writing 1 to IRQ bit in status register */
+	msgdma_status status;
+
+	if (!ctx)
+		return;
+
+	status.csr = 0;
+	status.irq = 1;
+	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
+}
+
+static int poll_interrupt(struct dma_afu_ctx *ctx)
+{
+	struct pollfd pfd = {0};
+	uint64_t count = 0;
+	ssize_t bytes_read = 0;
+	int poll_ret = 0;
+	int ret = 0;
+
+	if (!ctx || (ctx->event_fd < 0))
+		return -EINVAL;
+
+	pfd.fd = ctx->event_fd;
+	pfd.events = POLLIN;
+	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
+	if (poll_ret < 0) {
+		AFU_MF_PMD_ERR("Error %s", strerror(errno));
+		ret = -EFAULT;
+		goto out;
+	} else if (poll_ret == 0) {
+		AFU_MF_PMD_ERR("Timeout");
+		ret = -ETIMEDOUT;
+	} else {
+		bytes_read = read(pfd.fd, &count, sizeof(count));
+		if (bytes_read > 0) {
+			if (ctx->verbose)
+				AFU_MF_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
+					poll_ret, count);
+			ret = 0;
+		} else {
+			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
+				strerror(errno) : "zero bytes read");
+			ret = -EIO;
+		}
+	}
+out:
+	clear_interrupt(ctx);
+	return ret;
+}
+
+static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
+{
+	msgdma_status status;
+	uint64_t fpga_queue_full = 0;
+
+	if (!ctx)
+		return;
+
+	if (ctx->verbose) {
+		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
+			desc->rd_address_ext, desc->rd_address);
+		AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
+			desc->wr_address_ext, desc->wr_address);
+		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
+		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
+			desc->wr_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
+			desc->rd_burst_count);
+		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
+		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
+	}
+
+	do {
+		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
+		if (fpga_queue_full++ > 100000000) {
+			AFU_MF_PMD_DEBUG("DMA queue full retry");
+			fpga_queue_full = 0;
+		}
+	} while (status.desc_buf_full);
+
+	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
+		sizeof(*desc));
+}
+
+static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	int count, int is_last_desc, fpga_dma_type type, int intr_en)
+{
+	msgdma_ext_desc *desc = NULL;
+	int alignment_offset = 0;
+	int segment_size = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	/* src, dst and count must be 64-byte aligned */
+	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
+		!IS_DMA_ALIGNED(count))
+		return -EINVAL;
+	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
+
+	/* these fields are fixed for all DMA transfers */
+	desc = ctx->desc_buf;
+	desc->seq_num = 0;
+	desc->wr_stride = 1;
+	desc->rd_stride = 1;
+	desc->control.go = 1;
+	if (intr_en)
+		desc->control.transfer_irq_en = 1;
+	else
+		desc->control.transfer_irq_en = 0;
+
+	if (!is_last_desc)
+		desc->control.early_done_en = 1;
+	else
+		desc->control.early_done_en = 0;
+
+	if (type == FPGA_TO_FPGA) {
+		desc->rd_address = src & DMA_MASK_32_BIT;
+		desc->wr_address = dst & DMA_MASK_32_BIT;
+		desc->len = count;
+		desc->wr_burst_count = 4;
+		desc->rd_burst_count = 4;
+		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+		send_descriptor(ctx, desc);
+	} else {
+		/* check CCIP (host) address is aligned to 4CL (256B) */
+		alignment_offset = (type == HOST_TO_FPGA)
+			? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
+		/* performing a short transfer to get aligned */
+		if (alignment_offset != 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* count isn't large enough to hit next 4CL boundary */
+			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = CCIP_ALIGN_BYTES
+					- alignment_offset;
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			/* post short transfer to align to a 4CL (256 byte) */
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* at this point we are 4CL (256 byte) aligned */
+		if (count >= CCIP_ALIGN_BYTES) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->wr_burst_count = 4;
+			desc->rd_burst_count = 4;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			/* buffer ends on 4CL boundary */
+			if ((count % CCIP_ALIGN_BYTES) == 0) {
+				segment_size = count;
+				count = 0;
+			} else {
+				segment_size = count
+					- (count % CCIP_ALIGN_BYTES);
+				src += segment_size;
+				dst += segment_size;
+				count -= segment_size;
+				desc->control.transfer_irq_en = 0;
+			}
+			desc->len = segment_size;
+			send_descriptor(ctx, desc);
+		}
+		/* post short transfer to handle the remainder */
+		if (count > 0) {
+			desc->rd_address = src & DMA_MASK_32_BIT;
+			desc->wr_address = dst & DMA_MASK_32_BIT;
+			desc->len = count;
+			desc->wr_burst_count = 1;
+			desc->rd_burst_count = 1;
+			desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
+			desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
+			if (intr_en)
+				desc->control.transfer_irq_en = 1;
+			send_descriptor(ctx, desc);
+		}
+	}
+
+	return 0;
+}
+
+static int issue_magic(struct dma_afu_ctx *ctx)
+{
+	*(ctx->magic_buf) = 0ULL;
+	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
+		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
+}
+
+static void wait_magic(struct dma_afu_ctx *ctx)
+{
+	int magic_timeout = 0;
+
+	if (!ctx)
+		return;
+
+	poll_interrupt(ctx);
+	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
+		if (magic_timeout++ > 1000) {
+			AFU_MF_PMD_ERR("DMA magic operation timeout");
+			magic_timeout = 0;
+			break;
+		}
+	}
+	*(ctx->magic_buf) = 0ULL;
+}
+
+static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, int *intr_issued)
+{
+	int intr_en = 0;
+	int ret = 0;
+
+	if (!ctx || !intr_issued)
+		return -EINVAL;
+
+	src += chunk * ctx->dma_buf_size;
+	dst += chunk * ctx->dma_buf_size;
+
+	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
+		if (*intr_issued) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+		intr_en = 1;
+	}
+
+	chunk %= NUM_DMA_BUF;
+	rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
+		ctx->dma_buf_size);
+	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
+			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
+	if (intr_en)
+		*intr_issued = 1;
+
+	return ret;
+}
+
+static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t dma_tx_bytes = 0;
+	uint64_t offset = 0;
+	int issued_intr = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(dst)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_host_to_fpga(ctx, &dst, &src, count_left);
+
+		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
+			* DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - dst;
+		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_tx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1), &issued_intr);
+			if (ret)
+				return ret;
+		}
+
+		if (issued_intr) {
+			ret = poll_interrupt(ctx);
+			if (ret)
+				return ret;
+		}
+
+		if (count_left) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_tx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_tx_bytes);
+				rte_memcpy(ctx->dma_buf[0],
+					(void *)(uintptr_t)(src + offset),
+					dma_tx_bytes);
+				ret = do_dma(ctx, dst + offset,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
+				if (ret)
+					return ret;
+				ret = poll_interrupt(ctx);
+				if (ret)
+					return ret;
+			}
+
+			count_left -= dma_tx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_tx_bytes;
+				src += offset + dma_tx_bytes;
+				ret = ase_host_to_fpga(ctx, &dst, &src,
+					count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
+{
+	uint64_t i = chunk % NUM_DMA_BUF;
+	uint64_t n = *rx_count;
+	uint64_t num_pending = 0;
+	int ret = 0;
+
+	if (!ctx || !wf_issued)
+		return -EINVAL;
+
+	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
+		src + chunk * ctx->dma_buf_size,
+		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
+	if (ret)
+		return ret;
+
+	num_pending = chunk - n + 1;
+	if (num_pending == HALF_DMA_BUF) {
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
+		if (*wf_issued) {
+			wait_magic(ctx);
+			for (i = 0; i < HALF_DMA_BUF; i++) {
+				rte_memcpy((void *)(uintptr_t)(dst +
+						n * ctx->dma_buf_size),
+					ctx->dma_buf[n % NUM_DMA_BUF],
+					ctx->dma_buf_size);
+				n++;
+			}
+			*wf_issued = 0;
+			*rx_count = n;
+		}
+		ret = issue_magic(ctx);
+		if (ret) {
+			AFU_MF_PMD_DEBUG("Magic issue failed");
+			return ret;
+		}
+		*wf_issued = 1;
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t aligned_addr = 0;
+	uint64_t align_bytes = 0;
+	uint64_t dma_chunks = 0;
+	uint64_t pending_buf = 0;
+	uint64_t dma_rx_bytes = 0;
+	uint64_t offset = 0;
+	int wf_issued = 0;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (!IS_DMA_ALIGNED(src)) {
+		if (count_left < DMA_ALIGN_BYTES)
+			return ase_fpga_to_host(ctx, &src, &dst, count_left);
+
+		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
+			 * DMA_ALIGN_BYTES;
+		align_bytes = aligned_addr - src;
+		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
+		if (ret)
+			return ret;
+		count_left = count_left - align_bytes;
+	}
+
+	if (count_left) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = dma_rx_buf(ctx, dst, src, i,
+				i == (dma_chunks - 1),
+				&pending_buf, &wf_issued);
+			if (ret)
+				return ret;
+		}
+
+		if (wf_issued)
+			wait_magic(ctx);
+
+		/* clear out final dma memcpy operations */
+		while (pending_buf < dma_chunks) {
+			/* constant size transfer; no length check required */
+			rte_memcpy((void *)(uintptr_t)(dst +
+					pending_buf * ctx->dma_buf_size),
+				ctx->dma_buf[pending_buf % NUM_DMA_BUF],
+				ctx->dma_buf_size);
+			pending_buf++;
+		}
+
+		if (count_left > 0) {
+			i = count_left / DMA_ALIGN_BYTES;
+			if (i > 0) {
+				dma_rx_bytes = i * DMA_ALIGN_BYTES;
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
+					dma_rx_bytes);
+				ret = do_dma(ctx,
+					DMA_HOST_ADDR(ctx->dma_iova[0]),
+					src + offset,
+					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
+				if (ret)
+					return ret;
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+				rte_memcpy((void *)(uintptr_t)(dst + offset),
+					ctx->dma_buf[0], dma_rx_bytes);
+			}
+
+			count_left -= dma_rx_bytes;
+			if (count_left) {
+				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
+					count_left);
+				dst += offset + dma_rx_bytes;
+				src += offset + dma_rx_bytes;
+				ret = ase_fpga_to_host(ctx, &src, &dst,
+							count_left);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
+	size_t count)
+{
+	uint64_t i = 0;
+	uint64_t count_left = count;
+	uint64_t dma_chunks = 0;
+	uint64_t offset = 0;
+	uint32_t tx_chunks = 0;
+	uint64_t *tmp_buf = NULL;
+	int ret = 0;
+
+	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
+		count);
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
+	    && IS_DMA_ALIGNED(count_left)) {
+		dma_chunks = count_left / ctx->dma_buf_size;
+		offset = dma_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
+			" (%"PRIu64"...0x%"PRIx64")",
+			src, dst, dma_chunks, count_left);
+		for (i = 0; i < dma_chunks; i++) {
+			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
+				(i == (dma_chunks - 1))) {
+				ret = issue_magic(ctx);
+				if (ret)
+					return ret;
+				wait_magic(ctx);
+			}
+		}
+
+		if (count_left > 0) {
+			AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
+			ret = do_dma(ctx, dst + offset, src + offset,
+				count_left, 1, FPGA_TO_FPGA, 0);
+			if (ret)
+				return ret;
+			ret = issue_magic(ctx);
+			if (ret)
+				return ret;
+			wait_magic(ctx);
+		}
+	} else {
+		if ((src < dst) && (src + count_left > dst)) {
+			AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
+				" -> 0x%"PRIx64" (0x%"PRIx64")",
+				src, dst, count_left);
+			return -EINVAL;
+		}
+		tx_chunks = count_left / ctx->dma_buf_size;
+		offset = tx_chunks * ctx->dma_buf_size;
+		count_left -= offset;
+		AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
+			" (%u...0x%"PRIx64")",
+			src, dst, tx_chunks, count_left);
+		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
+			DMA_ALIGN_BYTES);
+		for (i = 0; i < tx_chunks; i++) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + i * ctx->dma_buf_size,
+				ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx,
+				dst + i * ctx->dma_buf_size,
+				(uint64_t)tmp_buf, ctx->dma_buf_size);
+			if (ret)
+				goto free_buf;
+		}
+
+		if (count_left > 0) {
+			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
+				src + offset, count_left);
+			if (ret)
+				goto free_buf;
+			ret = dma_host_to_fpga(ctx, dst + offset,
+				(uint64_t)tmp_buf, count_left);
+			if (ret)
+				goto free_buf;
+		}
+free_buf:
+		rte_free(tmp_buf);
+	}
+
+	return ret;
+}
+
+static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
+	uint64_t src, size_t count, fpga_dma_type type)
+{
+	int ret = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	if (type == HOST_TO_FPGA)
+		ret = dma_host_to_fpga(ctx, dst, src, count);
+	else if (type == FPGA_TO_HOST)
+		ret = dma_fpga_to_host(ctx, dst, src, count);
+	else if (type == FPGA_TO_FPGA)
+		ret = dma_fpga_to_fpga(ctx, dst, src, count);
+	else
+		return -EINVAL;
+
+	return ret;
+}
+
+static double getTime(struct timespec start, struct timespec end)
+{
+	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
+		+ end.tv_nsec - start.tv_nsec;
+	return (double)diff / (double)1000000000L;
+}
+
+#define SWEEP_ITERS 1
+static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
+	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
+{
+	struct timespec start, end;
+	uint64_t test_size = 0;
+	uint64_t *dma_buf_ptr = NULL;
+	double throughput, total_time = 0.0;
+	int i = 0;
+	int ret = 0;
+
+	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
+		AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
+		return -EINVAL;
+	}
+
+	if (length < (buf_offset + size_decrement)) {
+		AFU_MF_PMD_ERR("Test length does not match unaligned parameter");
+		return -EINVAL;
+	}
+	test_size = length - (buf_offset + size_decrement);
+	if ((ddr_offset + test_size) > ctx->mem_size) {
+		AFU_MF_PMD_ERR("Test is out of DDR memory space");
+		return -EINVAL;
+	}
+
+	dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
+	printf("Sweep Host %p to FPGA 0x%"PRIx64
+		" with 0x%"PRIx64" bytes ...\n",
+		(void *)dma_buf_ptr, ddr_offset, test_size);
+
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
+			test_size, HOST_TO_FPGA);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
+		ddr_offset, (void *)dma_buf_ptr, test_size);
+
+	total_time = 0.0;
+	memset((char *)dma_buf_ptr, 0, test_size);
+	for (i = 0; i < SWEEP_ITERS; i++) {
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
+			test_size, FPGA_TO_HOST);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+		if (ret) {
+			AFU_MF_PMD_ERR("Failed");
+			return ret;
+		}
+		total_time += getTime(start, end);
+	}
+	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
+	printf("Measured bandwidth = %lf MB/s\n", throughput);
+
+	printf("Verifying buffer ...\n");
+	return dma_afu_buf_verify(ctx, test_size);
+}
+
+static int dma_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	struct rte_pmd_afu_dma_cfg *cfg = NULL;
+	msgdma_ctrl ctrl;
+	uint64_t offset = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	cfg = &priv->dma_cfg;
+	if (cfg->index >= NUM_N3000_DMA)
+		return -EINVAL;
+	ctx = &priv->dma_ctx[cfg->index];
+
+	ctx->pattern = (int)cfg->pattern;
+	ctx->verbose = (int)cfg->verbose;
+	ctx->dma_buf_size = cfg->size;
+
+	ret = dma_afu_buf_alloc(ctx, cfg);
+	if (ret)
+		goto free;
+
+	printf("Initialize test buffer\n");
+	dma_afu_buf_init(ctx, cfg->length);
+
+	/* enable interrupt */
+	ctrl.csr = 0;
+	ctrl.global_intr_en_mask = 1;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
+		cfg->offset, cfg->length);
+	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
+		cfg->length, HOST_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from host to FPGA");
+		goto end;
+	}
+	memset(ctx->data_buf, 0, cfg->length);
+
+	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
+		offset = cfg->offset + cfg->length;
+	else if (cfg->offset > cfg->length)
+		offset = 0;
+	else
+		goto end;
+
+	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
+		cfg->offset, offset, cfg->length);
+	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
+		FPGA_TO_FPGA);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to FPGA");
+		goto end;
+	}
+
+	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
+		ctx->data_buf, cfg->length);
+	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
+		cfg->length, FPGA_TO_HOST);
+	if (ret) {
+		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to host");
+		goto end;
+	}
+	ret = dma_afu_buf_verify(ctx, cfg->length);
+	if (ret)
+		goto end;
+
+	printf("Sweep with aligned address and size\n");
+	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
+	if (ret)
+		goto end;
+
+	if (cfg->unaligned) {
+		printf("Sweep with unaligned address and size\n");
+		struct unaligned_set {
+			uint64_t addr_offset;
+			uint64_t size_dec;
+		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
+		for (i = 0; i < ARRAY_SIZE(param); i++) {
+			ret = sweep_test(ctx, cfg->length, cfg->offset,
+				param[i].addr_offset, param[i].size_dec);
+			if (ret)
+				break;
+		}
+	}
+
+end:
+	/* disable interrupt */
+	ctrl.global_intr_en_mask = 0;
+	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
+
+free:
+	dma_afu_buf_free(ctx);
+	return ret;
+}
+
+static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev *dev)
+{
+	struct rte_afu_device *afudev = NULL;
+
+	if (!dev || !dev->rawdev || !dev->rawdev->device)
+		return NULL;
+
+	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
+	if (!afudev->rawdev || !afudev->rawdev->device)
+		return NULL;
+
+	return RTE_DEV_TO_PCI(afudev->rawdev->device);
+}
+
+#ifdef VFIO_PRESENT
+static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
+	uint32_t count, int *efds)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	struct vfio_irq_set *irq_set = NULL;
+	int vfio_dev_fd = 0;
+	size_t sz = 0;
+	int ret = 0;
+
+	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
+		return -EINVAL;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return -ENODEV;
+	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
+
+	sz = sizeof(*irq_set) + sizeof(*efds) * count;
+	irq_set = rte_zmalloc(NULL, sz, 0);
+	if (!irq_set)
+		return -ENOMEM;
+
+	irq_set->argsz = (uint32_t)sz;
+	irq_set->count = count;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+		VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = vec_start;
+
+	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
+	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret)
+		AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
+
+	rte_free(irq_set);
+	return ret;
+}
+#endif
+
+static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
+{
+	struct rte_pci_device *pci_dev = NULL;
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint32_t bar = 0;
+
+	pci_dev = n3000_afu_get_pci_dev(dev);
+	if (!pci_dev)
+		return NULL;
+
+	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
+	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
+	if (!PORT_IMPLEMENTED(val)) {
+		AFU_MF_PMD_INFO("FIU port %d is not implemented", dev->port);
+		return NULL;
+	}
+
+	bar = PORT_BAR(val);
+	if (bar >= PCI_MAX_RESOURCE) {
+		AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
+		return NULL;
+	}
+
+	addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
+	return addr;
+}
+
+static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
+	uint32_t *vec_start, uint32_t *vec_count)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+	uint64_t header = 0;
+	uint64_t next_offset = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
+			(DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
+			val = rte_read64(addr + PORT_UINT_CAP_REG);
+			if (vec_start)
+				*vec_start = PORT_VEC_START(val);
+			if (vec_count)
+				*vec_count = PORT_VEC_COUNT(val);
+			return 0;
+		}
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return -ENOENT;
+}
+
+static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct nlb_afu_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->nlb_ctx;
+	ctx->addr = addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
+	return 0;
+
+release:
+	nlb_afu_ctx_release(dev);
+	return ret;
+}
+
+static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[0];
+
+	rte_free(ctx->desc_buf);
+	ctx->desc_buf = NULL;
+
+	rte_free(ctx->magic_buf);
+	ctx->magic_buf = NULL;
+
+	close(ctx->event_fd);
+	return 0;
+}
+
+static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t *addr)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct dma_afu_ctx *ctx = NULL;
+	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
+	static int efds[1] = {0};
+	uint32_t vec_start = 0;
+	int ret = 0;
+
+	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->dma_ctx[index];
+	ctx->index = index;
+	ctx->addr = addr;
+	ctx->csr_addr = addr + DMA_CSR;
+	ctx->desc_addr = addr + DMA_DESC;
+	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
+	ctx->ase_data_addr = addr + DMA_ASE_DATA;
+	ctx->mem_size = mem_sz[ctx->index];
+	ctx->cur_ase_page = INVALID_ASE_PAGE;
+	if (ctx->index == 0) {
+		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
+		if (ret)
+			return ret;
+
+		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+		if (efds[0] < 0) {
+			AFU_MF_PMD_ERR("eventfd create failed");
+			return -EBADF;
+		}
+#ifdef VFIO_PRESENT
+		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
+			AFU_MF_PMD_ERR("DMA interrupt setup failed");
+#endif
+	}
+	ctx->event_fd = efds[0];
+
+	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
+		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
+	if (!ctx->desc_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->magic_buf) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
+	if (ctx->magic_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	return 0;
+
+release:
+	dma_afu_ctx_release(dev);
+	return ret;
+}
+
+static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	uint8_t *addr = NULL;
+	uint64_t header = 0;
+	uint64_t uuid_hi = 0;
+	uint64_t uuid_lo = 0;
+	uint64_t next_offset = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	addr = (uint8_t *)dev->addr;
+	do {
+		addr += next_offset;
+		header = rte_read64(addr);
+		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
+		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
+
+		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
+			(uuid_lo == N3000_NLB0_UUID_L) &&
+			(uuid_hi == N3000_NLB0_UUID_H)) {
+			AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
+			ret = nlb_afu_ctx_init(dev, addr);
+			if (ret)
+				return ret;
+		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
+			(uuid_lo == N3000_DMA_UUID_L) &&
+			(uuid_hi == N3000_DMA_UUID_H) &&
+			(priv->num_dma < NUM_N3000_DMA)) {
+			AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
+				priv->num_dma, (void *)addr);
+			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
+			if (ret)
+				return ret;
+			priv->num_dma++;
+		} else {
+			AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
+				", uuid %016"PRIx64"%016"PRIx64,
+				DFH_TYPE(header), uuid_hi, uuid_lo);
+		}
+
+		next_offset = DFH_NEXT_OFFSET(header);
+		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
+			break;
+	} while (!DFH_EOL(header));
+
+	return 0;
+}
+
+static int n3000_afu_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return n3000_afu_ctx_init(dev);
+}
+
+static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct n3000_afu_priv *priv = NULL;
+	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
+	int i = 0;
+	uint64_t top = 0;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
+	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
+		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
+			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
+			return -EINVAL;
+		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
+			return -EINVAL;
+		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
+			return -EINVAL;
+		if ((cfg->nlb_cfg.multi_cl != 1) &&
+			(cfg->nlb_cfg.multi_cl != 2) &&
+			(cfg->nlb_cfg.multi_cl != 4))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
+			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
+			return -EINVAL;
+		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
+			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
+			return -EINVAL;
+		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
+			sizeof(struct rte_pmd_afu_nlb_cfg));
+	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
+		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
+			return -EINVAL;
+		i = cfg->dma_cfg.index;
+		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
+			return -EINVAL;
+		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
+		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
+			return -EINVAL;
+		if (i == 3) {  /* QDR connected to DMA3 */
+			if (cfg->dma_cfg.length & 0x3f) {
+				cfg->dma_cfg.length &= ~0x3f;
+				AFU_MF_PMD_INFO("Round size to %x for QDR",
+					cfg->dma_cfg.length);
+			}
+		}
+		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
+			sizeof(struct rte_pmd_afu_dma_cfg));
+	} else {
+		AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
+		return -EINVAL;
+	}
+
+	priv->cfg_type = cfg->type;
+	return 0;
+}
+
+static int n3000_afu_test(struct afu_mf_rawdev *dev)
+{
+	struct n3000_afu_priv *priv = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv)
+		return -ENOENT;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		AFU_MF_PMD_INFO("Test NLB");
+		ret = nlb_afu_test(dev);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
+		ret = dma_afu_test(dev);
+	} else {
+		AFU_MF_PMD_ERR("Please configure AFU before test");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int n3000_afu_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	nlb_afu_ctx_release(dev);
+	dma_afu_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct n3000_afu_priv *priv = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct n3000_afu_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
+		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+		fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+		fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+		fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
+		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
+		fprintf(f, "index:\t\t%d\n", ctx->index);
+		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
+		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
+		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
+		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
+		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
+		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
+		fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int n3000_afu_reset(struct afu_mf_rawdev *dev)
+{
+	uint8_t *addr = NULL;
+	uint64_t val = 0;
+
+	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
+	if (!addr)
+		return -ENOENT;
+
+	val = rte_read64(addr + PORT_CTRL_REG);
+	val |= PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+	rte_delay_us(100);
+	val &= ~PORT_SOFT_RESET;
+	rte_write64(val, addr + PORT_CTRL_REG);
+
+	return 0;
+}
+
+static struct afu_mf_ops n3000_afu_ops = {
+	.init = n3000_afu_init,
+	.config = n3000_afu_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = n3000_afu_test,
+	.close = n3000_afu_close,
+	.dump = n3000_afu_dump,
+	.reset = n3000_afu_reset
+};
+
+struct afu_mf_drv n3000_afu_drv = {
+	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	.ops = &n3000_afu_ops
+};
diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h
new file mode 100644
index 0000000..4c740da
--- /dev/null
+++ b/drivers/raw/afu_mf/n3000_afu.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _N3000_AFU_H_
+#define _N3000_AFU_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define N3000_AFU_UUID_L  0xc000c9660d824272
+#define N3000_AFU_UUID_H  0x9aeffe5f84570612
+#define N3000_NLB0_UUID_L 0xf89e433683f9040b
+#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
+#define N3000_DMA_UUID_L  0xa9149a35bace01ea
+#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
+
+extern struct afu_mf_drv n3000_afu_drv;
+
+#define NUM_N3000_DMA  4
+#define MAX_MSIX_VEC   7
+
+/* N3000 DFL definition */
+#define DFH_UUID_L_OFFSET  8
+#define DFH_UUID_H_OFFSET  16
+#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
+#define DFH_TYPE_AFU  1
+#define DFH_TYPE_BBB  2
+#define DFH_TYPE_PRIVATE  3
+#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
+#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
+#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
+#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
+#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
+#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
+#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
+#define PORT_FEATURE_UINT_ID  0x12
+#define PORT_UINT_CAP_REG  0x8
+#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
+#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
+#define PORT_CTRL_REG  0x38
+#define PORT_SOFT_RESET  (0x1 << 0)
+
+/* NLB registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_STATUS         0x40
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct nlb_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct nlb_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t wrthru_en:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd1:1;
+			uint32_t delay_en:1;
+			uint32_t rdsel:2;
+			uint32_t rsvd2:1;
+			uint32_t chsel:3;
+			uint32_t rsvd3:1;
+			uint32_t wrpush_i:1;
+			uint32_t wr_chsel:3;
+			uint32_t rsvd4:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t wrfence_chsel:2;
+		};
+	};
+};
+
+struct nlb_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct nlb_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct nlb_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+/* DMA registers definition */
+#define DMA_CSR       0x40
+#define DMA_DESC      0x60
+#define DMA_ASE_CTRL  0x200
+#define DMA_ASE_DATA  0x1000
+
+#define DMA_ASE_WINDOW       4096
+#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
+#define INVALID_ASE_PAGE     0xffffffffffffffffULL
+
+#define DMA_WF_MAGIC             0x5772745F53796E63ULL
+#define DMA_WF_MAGIC_ROM         0x1000000000000
+#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
+#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
+
+#define NUM_DMA_BUF   8
+#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
+
+#define DMA_MASK_32_BIT 0xFFFFFFFF
+
+#define DMA_CSR_BUSY           0x1
+#define DMA_DESC_BUFFER_EMPTY  0x2
+#define DMA_DESC_BUFFER_FULL   0x4
+
+#define DWORD_BYTES 4
+#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
+
+#define QWORD_BYTES 8
+#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
+
+#define DMA_ALIGN_BYTES 64
+#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
+
+#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
+
+#define DMA_TIMEOUT_MSEC  5000
+
+#define MAGIC_BUF_SIZE  64
+#define ERR_CHECK_LIMIT  64
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+typedef enum {
+	HOST_TO_FPGA = 0,
+	FPGA_TO_HOST,
+	FPGA_TO_FPGA,
+	FPGA_MAX_TRANSFER_TYPE,
+} fpga_dma_type;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t tx_channel:8;
+		uint32_t generate_sop:1;
+		uint32_t generate_eop:1;
+		uint32_t park_reads:1;
+		uint32_t park_writes:1;
+		uint32_t end_on_eop:1;
+		uint32_t reserved_1:1;
+		uint32_t transfer_irq_en:1;
+		uint32_t early_term_irq_en:1;
+		uint32_t trans_error_irq_en:8;
+		uint32_t early_done_en:1;
+		uint32_t reserved_2:6;
+		uint32_t go:1;
+	};
+} msgdma_desc_ctrl;
+
+typedef struct __rte_packed {
+	uint32_t rd_address;
+	uint32_t wr_address;
+	uint32_t len;
+	uint16_t seq_num;
+	uint8_t rd_burst_count;
+	uint8_t wr_burst_count;
+	uint16_t rd_stride;
+	uint16_t wr_stride;
+	uint32_t rd_address_ext;
+	uint32_t wr_address_ext;
+	msgdma_desc_ctrl control;
+} msgdma_ext_desc;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t busy:1;
+		uint32_t desc_buf_empty:1;
+		uint32_t desc_buf_full:1;
+		uint32_t rsp_buf_empty:1;
+		uint32_t rsp_buf_full:1;
+		uint32_t stopped:1;
+		uint32_t resetting:1;
+		uint32_t stopped_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t irq:1;
+		uint32_t reserved:22;
+	};
+} msgdma_status;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t stop_dispatcher:1;
+		uint32_t reset_dispatcher:1;
+		uint32_t stop_on_error:1;
+		uint32_t stopped_on_early_term:1;
+		uint32_t global_intr_en_mask:1;
+		uint32_t stop_descriptors:1;
+		uint32_t reserved:22;
+	};
+} msgdma_ctrl;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_fill_level:16;
+		uint32_t wr_fill_level:16;
+	};
+} msgdma_fill_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rsp_fill_level:16;
+		uint32_t reserved:16;
+	};
+} msgdma_rsp_level;
+
+typedef union {
+	uint32_t csr;
+	struct {
+		uint32_t rd_seq_num:16;
+		uint32_t wr_seq_num:16;
+	};
+} msgdma_seq_num;
+
+typedef struct __rte_packed {
+	msgdma_status status;
+	msgdma_ctrl ctrl;
+	msgdma_fill_level fill_level;
+	msgdma_rsp_level rsp;
+	msgdma_seq_num seq_num;
+} msgdma_csr;
+
+#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
+#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
+
+struct nlb_afu_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct nlb_dsm_status *status_ptr;
+};
+
+struct dma_afu_ctx {
+	int index;
+	uint8_t *addr;
+	uint8_t *csr_addr;
+	uint8_t *desc_addr;
+	uint8_t *ase_ctrl_addr;
+	uint8_t *ase_data_addr;
+	uint64_t mem_size;
+	uint64_t cur_ase_page;
+	int event_fd;
+	int verbose;
+	int pattern;
+	void *data_buf;
+	void *ref_buf;
+	msgdma_ext_desc *desc_buf;
+	uint64_t *magic_buf;
+	uint64_t magic_iova;
+	uint32_t dma_buf_size;
+	uint64_t *dma_buf[NUM_DMA_BUF];
+	uint64_t dma_iova[NUM_DMA_BUF];
+};
+
+struct n3000_afu_priv {
+	struct rte_pmd_afu_nlb_cfg nlb_cfg;
+	struct rte_pmd_afu_dma_cfg dma_cfg;
+	struct nlb_afu_ctx nlb_ctx;
+	struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
+	int num_dma;
+	int cfg_type;
+};
+
+#endif /* _N3000_AFU_H_ */
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
new file mode 100644
index 0000000..f14a053
--- /dev/null
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2022 Intel Corporation
+ */
+
+#ifndef __RTE_PMD_AFU_H__
+#define __RTE_PMD_AFU_H__
+
+/**
+ * @file rte_pmd_afu.h
+ *
+ * AFU PMD specific definitions.
+ *
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#define RTE_PMD_AFU_N3000_NLB   1
+#define RTE_PMD_AFU_N3000_DMA   2
+
+#define NLB_MODE_LPBK      0
+#define NLB_MODE_READ      1
+#define NLB_MODE_WRITE     2
+#define NLB_MODE_TRPUT     3
+
+#define NLB_VC_AUTO        0
+#define NLB_VC_VL0         1
+#define NLB_VC_VH0         2
+#define NLB_VC_VH1         3
+#define NLB_VC_RANDOM      4
+
+#define NLB_WRLINE_M       0
+#define NLB_WRLINE_I       1
+#define NLB_WRPUSH_I       2
+
+#define NLB_RDLINE_S       0
+#define NLB_RDLINE_I       1
+#define NLB_RDLINE_MIXED   2
+
+#define MIN_CACHE_LINES   1
+#define MAX_CACHE_LINES   1024
+
+#define MIN_DMA_BUF_SIZE  64
+#define MAX_DMA_BUF_SIZE  (1023 * 1024)
+
+/**
+ * NLB AFU configuration data structure.
+ */
+struct rte_pmd_afu_nlb_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t cache_policy;
+	uint32_t cache_hint;
+	uint32_t read_vc;
+	uint32_t write_vc;
+	uint32_t wrfence_vc;
+	uint32_t freq_mhz;
+};
+
+/**
+ * DMA AFU configuration data structure.
+ */
+struct rte_pmd_afu_dma_cfg {
+	uint32_t index;     /* index of DMA controller */
+	uint32_t length;    /* total length of data to DMA */
+	uint32_t offset;    /* address offset of target memory */
+	uint32_t size;      /* size of transfer buffer */
+	uint32_t pattern;   /* data pattern to fill in test buffer */
+	uint32_t unaligned; /* use unaligned address or length in sweep test */
+	uint32_t verbose;   /* enable verbose error information in test */
+};
+
+/**
+ * N3000 AFU configuration data structure.
+ */
+struct rte_pmd_afu_n3000_cfg {
+	int type;   /* RTE_PMD_AFU_N3000_NLB or RTE_PMD_AFU_N3000_DMA */
+	union {
+		struct rte_pmd_afu_nlb_cfg nlb_cfg;
+		struct rte_pmd_afu_dma_cfg dma_cfg;
+	};
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __RTE_PMD_AFU_H__ */
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 3/5] raw/afu_mf: add HE-LBK AFU driver
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
  2022-05-27  5:37         ` [PATCH v5 1/5] drivers/raw: introduce AFU " Wei Huang
  2022-05-27  5:37         ` [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver Wei Huang
@ 2022-05-27  5:37         ` Wei Huang
  2022-06-06  1:41           ` Zhang, Tianfei
  2022-05-27  5:37         ` [PATCH v5 4/5] raw/afu_mf: add HE-MEM " Wei Huang
                           ` (3 subsequent siblings)
  6 siblings, 1 reply; 57+ messages in thread
From: Wei Huang @ 2022-05-27  5:37 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

HE-LBK and HE-MEM-LBK are host exerciser modules in OFS FPGA,
HE-LBK is used to test PCI bus and HE-MEM-LBK is used to test
local memory.
This driver initialize the modules and report test result.

Signed-off-by: Wei Huang <wei.huang@intel.com>
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |   5 +
 drivers/raw/afu_mf/he_lbk.c        | 427 +++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/he_lbk.h        | 121 +++++++++++
 drivers/raw/afu_mf/meson.build     |   2 +-
 drivers/raw/afu_mf/rte_pmd_afu.h   |  14 ++
 5 files changed, 568 insertions(+), 1 deletion(-)
 create mode 100644 drivers/raw/afu_mf/he_lbk.c
 create mode 100644 drivers/raw/afu_mf/he_lbk.h

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
index 7c18f3b..e91eb21 100644
--- a/drivers/raw/afu_mf/afu_mf_rawdev.c
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -20,16 +20,21 @@
 #include "rte_pmd_afu.h"
 #include "afu_mf_rawdev.h"
 #include "n3000_afu.h"
+#include "he_lbk.h"
 
 #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
 
 static const struct rte_afu_uuid afu_uuid_map[] = {
 	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
+	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
+	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
 	{ 0, 0 /* sentinel */ }
 };
 
 static struct afu_mf_drv *afu_table[] = {
 	&n3000_afu_drv,
+	&he_lbk_drv,
+	&he_mem_lbk_drv,
 	NULL
 };
 
diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c
new file mode 100644
index 0000000..8735647
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.c
@@ -0,0 +1,427 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_lbk.h"
+
+static int he_lbk_afu_config(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_csr_cfg v;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+
+	v.csr = 0;
+
+	if (cfg->cont)
+		v.cont = 1;
+
+	v.mode = cfg->mode;
+	v.trput_interleave = cfg->trput_interleave;
+	if (cfg->multi_cl == 4)
+		v.multicl_len = 2;
+	else
+		v.multicl_len = cfg->multi_cl - 1;
+
+	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
+	rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
+
+	return 0;
+}
+
+static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_dsm_status *stat = NULL;
+	struct he_lbk_status0 stat0;
+	struct he_lbk_status1 stat1;
+	uint64_t swtest_msg = 0;
+	uint64_t ticks = 0;
+	uint64_t info = 0;
+	double num, rd_bw, wr_bw;
+
+	if (!dev || !dev->priv)
+		return;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	stat = ctx->status_ptr;
+
+	swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
+	stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
+	stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
+
+	if (cfg->cont)
+		ticks = stat->num_clocks - stat->start_overhead;
+	else
+		ticks = stat->num_clocks -
+			(stat->start_overhead + stat->end_overhead);
+
+	if (cfg->freq_mhz == 0) {
+		info = rte_read64(ctx->addr + CSR_HE_INFO0);
+		AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
+		cfg->freq_mhz = info & 0xffff;
+		if (cfg->freq_mhz == 0) {
+			AFU_MF_PMD_INFO("Frequency of AFU clock is unknown."
+				" Assuming 350 MHz.");
+			cfg->freq_mhz = 350;
+		}
+	}
+
+	num = (double)stat0.num_reads;
+	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+	num = (double)stat0.num_writes;
+	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
+
+	printf("Cachelines  Read_Count Write_Count Pend_Read Pend_Write "
+		"Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
+		cfg->freq_mhz);
+	printf("%10u  %10u %10u %10u %10u  %12"PRIu64
+		"   %7.3f GB/s   %7.3f GB/s\n",
+		cl, stat0.num_reads, stat0.num_writes,
+		stat1.num_pend_reads, stat1.num_pend_writes,
+		ticks, rd_bw / 1e9, wr_bw / 1e9);
+	printf("Test Message: 0x%"PRIx64"\n", swtest_msg);
+}
+
+static int he_lbk_test(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	struct he_lbk_csr_ctl ctl;
+	uint32_t *ptr = NULL;
+	uint32_t i, j, cl, val = 0;
+	uint64_t sval = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_lbk_cfg;
+	ctx = &priv->he_lbk_ctx;
+
+	ctl.csr = 0;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+	rte_delay_us(1000);
+	ctl.reset = 1;
+	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+	/* initialize DMA addresses */
+	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
+	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
+
+	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
+	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
+
+	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
+	rte_write32(SIZE_TO_CLS(ctx->dsm_iova), ctx->addr + CSR_AFU_DSM_BASEL);
+	rte_write32(SIZE_TO_CLS(ctx->dsm_iova) >> 32,
+		ctx->addr + CSR_AFU_DSM_BASEH);
+
+	ret = he_lbk_afu_config(dev);
+	if (ret)
+		return ret;
+
+	/* initialize src data */
+	ptr = (uint32_t *)ctx->src_ptr;
+	j = CLS_TO_SIZE(cfg->end) >> 2;
+	for (i = 0; i < j; i++)
+		*ptr++ = i;
+
+	/* start test */
+	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
+		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
+		memset(ctx->dsm_ptr, 0, DSM_SIZE);
+
+		ctl.csr = 0;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		rte_delay_us(1000);
+		ctl.reset = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
+
+		ctl.start = 1;
+		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+
+		if (cfg->cont) {
+			rte_delay_ms(cfg->timeout * 1000);
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+		} else {
+			ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
+				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
+				DSM_TIMEOUT);
+			if (ret) {
+				printf("DSM poll timeout\n");
+				goto end;
+			}
+			ctl.force_completion = 1;
+			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
+		}
+
+		he_lbk_report(dev, cl);
+
+		i = 0;
+		while (i++ < 100) {
+			sval = rte_read64(ctx->addr + CSR_STATUS1);
+			if (sval == 0)
+				break;
+			rte_delay_us(1000);
+		}
+
+		if (cfg->mode == NLB_MODE_LPBK) {
+			ptr = (uint32_t *)ctx->dest_ptr;
+			j = CLS_TO_SIZE(cl) >> 2;
+			for (i = 0; i < j; i++) {
+				if (*ptr++ != i) {
+					AFU_MF_PMD_ERR("Data mismatch @ %u", i);
+					break;
+				}
+			}
+		}
+	}
+
+end:
+	return 0;
+}
+
+static int he_lbk_ctx_release(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+
+	rte_free(ctx->dsm_ptr);
+	ctx->dsm_ptr = NULL;
+	ctx->status_ptr = NULL;
+
+	rte_free(ctx->src_ptr);
+	ctx->src_ptr = NULL;
+
+	rte_free(ctx->dest_ptr);
+	ctx->dest_ptr = NULL;
+
+	return 0;
+}
+
+static int he_lbk_ctx_init(struct afu_mf_rawdev *dev)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	ctx = &priv->he_lbk_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
+	if (!ctx->dsm_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
+	if (ctx->dsm_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->src_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
+	if (ctx->src_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
+		TEST_MEM_ALIGN);
+	if (!ctx->dest_ptr) {
+		ret = -ENOMEM;
+		goto release;
+	}
+	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
+	if (ctx->dest_iova == RTE_BAD_IOVA) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
+	return 0;
+
+release:
+	he_lbk_ctx_release(dev);
+	return ret;
+}
+
+static int he_lbk_init(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->priv) {
+		dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
+		if (!dev->priv)
+			return -ENOMEM;
+	}
+
+	return he_lbk_ctx_init(dev);
+}
+
+static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
+	if (cfg->mode > NLB_MODE_TRPUT)
+		return -EINVAL;
+	if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
+		(cfg->multi_cl != 4))
+		return -EINVAL;
+	if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin > MAX_CACHE_LINES))
+		return -EINVAL;
+	if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
+
+	return 0;
+}
+
+static int he_lbk_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	he_lbk_ctx_release(dev);
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_lbk_priv *priv = NULL;
+	struct he_lbk_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_lbk_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_lbk_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+	fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
+	fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
+	fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
+	fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
+	fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
+	fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
+	fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_lbk_ops = {
+	.init = he_lbk_init,
+	.config = he_lbk_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_lbk_test,
+	.close = he_lbk_close,
+	.dump = he_lbk_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_lbk_drv = {
+	.uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
+
+struct afu_mf_drv he_mem_lbk_drv = {
+	.uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	.ops = &he_lbk_ops
+};
diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h
new file mode 100644
index 0000000..c2e8a29
--- /dev/null
+++ b/drivers/raw/afu_mf/he_lbk.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_LBK_H_
+#define _HE_LBK_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_LBK_UUID_L      0xb94b12284c31e02b
+#define HE_LBK_UUID_H      0x56e203e9864f49a7
+#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb
+#define HE_MEM_LBK_UUID_H  0x8568ab4e6ba54616
+
+extern struct afu_mf_drv he_lbk_drv;
+extern struct afu_mf_drv he_mem_lbk_drv;
+
+/* HE-LBK & HE-MEM-LBK registers definition */
+#define CSR_SCRATCHPAD0    0x100
+#define CSR_SCRATCHPAD1    0x108
+#define CSR_AFU_DSM_BASEL  0x110
+#define CSR_AFU_DSM_BASEH  0x114
+#define CSR_SRC_ADDR       0x120
+#define CSR_DST_ADDR       0x128
+#define CSR_NUM_LINES      0x130
+#define CSR_CTL            0x138
+#define CSR_CFG            0x140
+#define CSR_INACT_THRESH   0x148
+#define CSR_INTERRUPT0     0x150
+#define CSR_SWTEST_MSG     0x158
+#define CSR_STATUS0        0x160
+#define CSR_STATUS1        0x168
+#define CSR_ERROR          0x170
+#define CSR_STRIDE         0x178
+#define CSR_HE_INFO0       0x180
+
+#define DSM_SIZE           0x200000
+#define DSM_POLL_INTERVAL  5  /* ms */
+#define DSM_TIMEOUT        1000  /* ms */
+
+#define NLB_BUF_SIZE  0x400000
+#define TEST_MEM_ALIGN  1024
+
+struct he_lbk_csr_ctl {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t reset:1;
+			uint32_t start:1;
+			uint32_t force_completion:1;
+			uint32_t reserved:29;
+		};
+	};
+};
+
+struct he_lbk_csr_cfg {
+	union {
+		uint32_t csr;
+		struct {
+			uint32_t rsvd1:1;
+			uint32_t cont:1;
+			uint32_t mode:3;
+			uint32_t multicl_len:2;
+			uint32_t rsvd2:13;
+			uint32_t trput_interleave:3;
+			uint32_t test_cfg:5;
+			uint32_t interrupt_on_error:1;
+			uint32_t interrupt_testmode:1;
+			uint32_t rsvd3:2;
+		};
+	};
+};
+
+struct he_lbk_status0 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_writes;
+			uint32_t num_reads;
+		};
+	};
+};
+
+struct he_lbk_status1 {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t num_pend_writes;
+			uint32_t num_pend_reads;
+		};
+	};
+};
+
+struct he_lbk_dsm_status {
+	uint32_t test_complete;
+	uint32_t test_error;
+	uint64_t num_clocks;
+	uint32_t num_reads;
+	uint32_t num_writes;
+	uint32_t start_overhead;
+	uint32_t end_overhead;
+};
+
+struct he_lbk_ctx {
+	uint8_t *addr;
+	uint8_t *dsm_ptr;
+	uint64_t dsm_iova;
+	uint8_t *src_ptr;
+	uint64_t src_iova;
+	uint8_t *dest_ptr;
+	uint64_t dest_iova;
+	struct he_lbk_dsm_status *status_ptr;
+};
+
+struct he_lbk_priv {
+	struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
+	struct he_lbk_ctx he_lbk_ctx;
+};
+
+#endif /* _HE_LBK_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
index 8a989e3..a983f53 100644
--- a/drivers/raw/afu_mf/meson.build
+++ b/drivers/raw/afu_mf/meson.build
@@ -2,6 +2,6 @@
 # Copyright 2022 Intel Corporation
 
 deps += ['rawdev', 'bus_pci', 'bus_ifpga']
-sources = files('afu_mf_rawdev.c', 'n3000_afu.c')
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c')
 
 headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
index f14a053..658df55 100644
--- a/drivers/raw/afu_mf/rte_pmd_afu.h
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -90,6 +90,20 @@ struct rte_pmd_afu_n3000_cfg {
 	};
 };
 
+/**
+ * HE-LBK & HE-MEM-LBK AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_lbk_cfg {
+	uint32_t mode;
+	uint32_t begin;
+	uint32_t end;
+	uint32_t multi_cl;
+	uint32_t cont;
+	uint32_t timeout;
+	uint32_t trput_interleave;
+	uint32_t freq_mhz;
+};
+
 #ifdef __cplusplus
 }
 #endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 4/5] raw/afu_mf: add HE-MEM AFU driver
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
                           ` (2 preceding siblings ...)
  2022-05-27  5:37         ` [PATCH v5 3/5] raw/afu_mf: add HE-LBK " Wei Huang
@ 2022-05-27  5:37         ` Wei Huang
  2022-06-06  6:36           ` Zhang, Tianfei
  2022-05-27  5:37         ` [PATCH v5 5/5] raw/afu_mf: add HE-HSSI " Wei Huang
                           ` (2 subsequent siblings)
  6 siblings, 1 reply; 57+ messages in thread
From: Wei Huang @ 2022-05-27  5:37 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

HE-MEM is one of the host exerciser modules in OFS FPGA,
which is used to test local memory with built-in traffic
generator.
This driver initialize the module and report test result.

Signed-off-by: Wei Huang <wei.huang@intel.com>
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |   3 +
 drivers/raw/afu_mf/he_mem.c        | 181 +++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/he_mem.h        |  40 ++++++++
 drivers/raw/afu_mf/meson.build     |   2 +-
 drivers/raw/afu_mf/rte_pmd_afu.h   |   7 ++
 5 files changed, 232 insertions(+), 1 deletion(-)
 create mode 100644 drivers/raw/afu_mf/he_mem.c
 create mode 100644 drivers/raw/afu_mf/he_mem.h

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
index e91eb21..a56f60e 100644
--- a/drivers/raw/afu_mf/afu_mf_rawdev.c
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -21,6 +21,7 @@
 #include "afu_mf_rawdev.h"
 #include "n3000_afu.h"
 #include "he_lbk.h"
+#include "he_mem.h"
 
 #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
 
@@ -28,6 +29,7 @@
 	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
 	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
 	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
+	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
 	{ 0, 0 /* sentinel */ }
 };
 
@@ -35,6 +37,7 @@
 	&n3000_afu_drv,
 	&he_lbk_drv,
 	&he_mem_lbk_drv,
+	&he_mem_tg_drv,
 	NULL
 };
 
diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
new file mode 100644
index 0000000..ccbb3a8
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_mem.h"
+
+static int he_mem_tg_test(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+	uint64_t value = 0x12345678;
+	uint64_t cap = 0;
+	uint64_t channel_mask = 0;
+	int i, t = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_mem_tg_cfg;
+	ctx = &priv->he_mem_tg_ctx;
+
+	AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
+
+	rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
+	cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
+	AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
+	if (cap != value) {
+		AFU_MF_PMD_ERR("Test scratchpad register failed");
+		return -EIO;
+	}
+
+	cap = rte_read64(ctx->addr + MEM_TG_CTRL);
+	AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
+
+	channel_mask = cfg->channel_mask & cap;
+	/* start traffic generators */
+	rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
+
+	/* check test status */
+	while (t < MEM_TG_TIMEOUT_MS) {
+		value = rte_read64(ctx->addr + MEM_TG_STAT);
+		for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
+			if (channel_mask & (1 << i)) {
+				if (TGACTIVE(value, i))
+					continue;
+				printf("TG channel %d test %s\n", i,
+					TGPASS(value, i) ? "pass" :
+					TGTIMEOUT(value, i) ? "timeout" :
+					TGFAIL(value, i) ? "fail" : "error");
+				channel_mask &= ~(1 << i);
+			}
+		}
+		if (!channel_mask)
+			break;
+		rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
+		t += MEM_TG_POLL_INTERVAL_MS;
+	}
+
+	if (channel_mask) {
+		AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value);
+		return channel_mask;
+	}
+
+	return 0;
+}
+
+static int he_mem_tg_init(struct afu_mf_rawdev *dev)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_mem_tg_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_mem_tg_priv *priv = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv->he_mem_tg_cfg));
+
+	return 0;
+}
+
+static int he_mem_tg_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_mem_tg_priv *priv = NULL;
+	struct he_mem_tg_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_mem_tg_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_mem_tg_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_mem_tg_ops = {
+	.init = he_mem_tg_init,
+	.config = he_mem_tg_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_mem_tg_test,
+	.close = he_mem_tg_close,
+	.dump = he_mem_tg_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_mem_tg_drv = {
+	.uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	.ops = &he_mem_tg_ops
+};
diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h
new file mode 100644
index 0000000..82404b6
--- /dev/null
+++ b/drivers/raw/afu_mf/he_mem.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_MEM_H_
+#define _HE_MEM_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb
+#define HE_MEM_TG_UUID_H  0x4dadea342c7848cb
+
+#define NUM_MEM_TG_CHANNELS      4
+#define MEM_TG_TIMEOUT_MS     5000
+#define MEM_TG_POLL_INTERVAL_MS 10
+
+extern struct afu_mf_drv he_mem_tg_drv;
+
+/* MEM-TG registers definition */
+#define MEM_TG_SCRATCHPAD   0x28
+#define MEM_TG_CTRL         0x30
+#define   TGCONTROL(n)      (1 << (n))
+#define MEM_TG_STAT         0x38
+#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
+#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
+#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
+#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
+#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
+
+struct he_mem_tg_ctx {
+	uint8_t *addr;
+};
+
+struct he_mem_tg_priv {
+	struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
+	struct he_mem_tg_ctx he_mem_tg_ctx;
+};
+
+#endif /* _HE_MEM_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
index a983f53..b53a31b 100644
--- a/drivers/raw/afu_mf/meson.build
+++ b/drivers/raw/afu_mf/meson.build
@@ -2,6 +2,6 @@
 # Copyright 2022 Intel Corporation
 
 deps += ['rawdev', 'bus_pci', 'bus_ifpga']
-sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c')
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c')
 
 headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
index 658df55..2f92f7e 100644
--- a/drivers/raw/afu_mf/rte_pmd_afu.h
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -104,6 +104,13 @@ struct rte_pmd_afu_he_lbk_cfg {
 	uint32_t freq_mhz;
 };
 
+/**
+ * HE-MEM-TG AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_mem_tg_cfg {
+	uint32_t channel_mask;   /* mask of traffic generator channel */
+};
+
 #ifdef __cplusplus
 }
 #endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v5 5/5] raw/afu_mf: add HE-HSSI AFU driver
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
                           ` (3 preceding siblings ...)
  2022-05-27  5:37         ` [PATCH v5 4/5] raw/afu_mf: add HE-MEM " Wei Huang
@ 2022-05-27  5:37         ` Wei Huang
  2022-06-06  6:39           ` Zhang, Tianfei
  2022-06-06  1:47         ` [PATCH v5 0/5] introduce afu_mf raw device driver Zhang, Tianfei
  2022-06-09  2:44         ` [PATCH v6 0/5] introduce AFU PMD driver of FPGA Wei Huang
  6 siblings, 1 reply; 57+ messages in thread
From: Wei Huang @ 2022-05-27  5:37 UTC (permalink / raw)
  To: dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, rosen.xu, tianfei.zhang, qi.z.zhang, Wei Huang

HE-HSSI is one of the host exerciser modules in OFS FPGA,
which is used to test HSSI (High Speed Serial Interface).
This driver initialize the module and report test result.

Signed-off-by: Wei Huang <wei.huang@intel.com>
---
 drivers/raw/afu_mf/afu_mf_rawdev.c |   3 +
 drivers/raw/afu_mf/he_hssi.c       | 369 +++++++++++++++++++++++++++++++++++++
 drivers/raw/afu_mf/he_hssi.h       | 102 ++++++++++
 drivers/raw/afu_mf/meson.build     |   3 +-
 drivers/raw/afu_mf/rte_pmd_afu.h   |  16 ++
 5 files changed, 492 insertions(+), 1 deletion(-)
 create mode 100644 drivers/raw/afu_mf/he_hssi.c
 create mode 100644 drivers/raw/afu_mf/he_hssi.h

diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c b/drivers/raw/afu_mf/afu_mf_rawdev.c
index a56f60e..f24c748 100644
--- a/drivers/raw/afu_mf/afu_mf_rawdev.c
+++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
@@ -22,6 +22,7 @@
 #include "n3000_afu.h"
 #include "he_lbk.h"
 #include "he_mem.h"
+#include "he_hssi.h"
 
 #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
 
@@ -30,6 +31,7 @@
 	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
 	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
 	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
+	{ HE_HSSI_UUID_L, HE_HSSI_UUID_H },
 	{ 0, 0 /* sentinel */ }
 };
 
@@ -38,6 +40,7 @@
 	&he_lbk_drv,
 	&he_mem_lbk_drv,
 	&he_mem_tg_drv,
+	&he_hssi_drv,
 	NULL
 };
 
diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c
new file mode 100644
index 0000000..bedafbd
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.c
@@ -0,0 +1,369 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_memcpy.h>
+#include <rte_io.h>
+#include <rte_vfio.h>
+#include <rte_bus_pci.h>
+#include <rte_bus_ifpga.h>
+#include <rte_rawdev.h>
+
+#include "afu_mf_rawdev.h"
+#include "he_hssi.h"
+
+static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr, value);
+
+	if (!ctx)
+		return -EINVAL;
+
+	data.write_data = value;
+	rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
+
+	cmd.csr = 0;
+	cmd.write_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
+	uint32_t *value)
+{
+	struct traffic_ctrl_cmd cmd;
+	struct traffic_ctrl_data data;
+	uint32_t i = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	cmd.csr = 0;
+	cmd.read_cmd = 1;
+	cmd.afu_cmd_addr = addr;
+	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+
+	while (i < MAILBOX_TIMEOUT_MS) {
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (cmd.ack_trans) {
+			data.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_DATA);
+			*value = data.read_data;
+			break;
+		}
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	i = 0;
+	cmd.csr = 0;
+	while (i < MAILBOX_TIMEOUT_MS) {
+		cmd.ack_trans = 1;
+		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
+		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
+		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
+		if (!cmd.ack_trans)
+			break;
+		i += MAILBOX_POLL_INTERVAL_MS;
+	}
+	if (i >= MAILBOX_TIMEOUT_MS)
+		return -ETIMEDOUT;
+
+	AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr, *value);
+	return 0;
+}
+
+static void he_hssi_report(struct he_hssi_ctx *ctx)
+{
+	uint32_t val = 0;
+	uint64_t v64 = 0;
+	int ret = 0;
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
+	if (ret)
+		return;
+	printf("Number of good packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
+	if (ret)
+		return;
+	printf("Number of bad packets received: %u\n", val);
+
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
+	if (ret)
+		return;
+	v64 = val;
+	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
+	if (ret)
+		return;
+	v64 = (v64 << 32) | val;
+	printf("Number of bytes received: %"PRIu64"\n", v64);
+
+	ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
+	if (ret)
+		return;
+	if (val & ERR_VALID) {
+		printf("AVST rx error:");
+		if (val & OVERFLOW_ERR)
+			printf(" overflow");
+		if (val & LENGTH_ERR)
+			printf(" length");
+		if (val & OVERSIZE_ERR)
+			printf(" oversize");
+		if (val & UNDERSIZE_ERR)
+			printf(" undersize");
+		if (val & MAC_CRC_ERR)
+			printf(" crc");
+		if (val & PHY_ERR)
+			printf(" phy");
+		printf("\n");
+	}
+
+	ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
+	if (ret)
+		return;
+	if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
+		printf("FIFO status:");
+		if (val & ALMOST_EMPTY)
+			printf(" almost empty");
+		if (val & ALMOST_FULL)
+			printf(" almost full");
+		printf("\n");
+	}
+}
+
+static int he_hssi_test(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+	struct traffic_ctrl_ch_sel sel;
+	uint32_t val = 0;
+	uint32_t i = 0;
+	int ret = 0;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	cfg = &priv->he_hssi_cfg;
+	ctx = &priv->he_hssi_ctx;
+
+	ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
+	if (ret)
+		return ret;
+
+	sel.channel_sel = cfg->port;
+	rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
+
+	if (cfg->he_loopback >= 0) {
+		val = cfg->he_loopback ? 1 : 0;
+		AFU_MF_PMD_INFO("%s HE loopback on port %u",
+			val ? "Enable" : "Disable", cfg->port);
+		return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
+	if (ret)
+		return ret;
+
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
+	if (ret)
+		return ret;
+
+	val = cfg->src_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->src_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->dest_addr & 0xffffffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
+	if (ret)
+		return ret;
+	val = (cfg->dest_addr >> 32) & 0xffff;
+	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_length ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
+	if (ret)
+		return ret;
+
+	val = cfg->random_payload ? 1 : 0;
+	ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 3; i++) {
+		ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
+			cfg->rnd_seed[i]);
+		if (ret)
+			return ret;
+	}
+
+	ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
+	if (ret)
+		return ret;
+
+	while (i++ < cfg->timeout) {
+		ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
+		if (ret)
+			break;
+		if (val == cfg->num_packets)
+			break;
+		sleep(1);
+	}
+
+	he_hssi_report(ctx);
+
+	return ret;
+}
+
+static int he_hssi_init(struct afu_mf_rawdev *dev)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv) {
+		priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
+		if (!priv)
+			return -ENOMEM;
+		dev->priv = priv;
+	}
+
+	ctx = &priv->he_hssi_ctx;
+	ctx->addr = (uint8_t *)dev->addr;
+
+	return 0;
+}
+
+static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
+	size_t config_size)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
+
+	if (!dev || !config || !config_size)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
+		return -EINVAL;
+
+	cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
+	if (cfg->port >= NUM_HE_HSSI_PORTS)
+		return -EINVAL;
+
+	rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
+
+	return 0;
+}
+
+static int he_hssi_close(struct afu_mf_rawdev *dev)
+{
+	if (!dev)
+		return -EINVAL;
+
+	rte_free(dev->priv);
+	dev->priv = NULL;
+
+	return 0;
+}
+
+static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f)
+{
+	struct he_hssi_priv *priv = NULL;
+	struct he_hssi_ctx *ctx = NULL;
+
+	if (!dev)
+		return -EINVAL;
+
+	priv = (struct he_hssi_priv *)dev->priv;
+	if (!priv)
+		return -ENOENT;
+
+	if (!f)
+		f = stdout;
+
+	ctx = &priv->he_hssi_ctx;
+
+	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
+
+	return 0;
+}
+
+static struct afu_mf_ops he_hssi_ops = {
+	.init = he_hssi_init,
+	.config = he_hssi_config,
+	.start = NULL,
+	.stop = NULL,
+	.test = he_hssi_test,
+	.close = he_hssi_close,
+	.dump = he_hssi_dump,
+	.reset = NULL
+};
+
+struct afu_mf_drv he_hssi_drv = {
+	.uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
+	.ops = &he_hssi_ops
+};
diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h
new file mode 100644
index 0000000..f8b9623
--- /dev/null
+++ b/drivers/raw/afu_mf/he_hssi.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _HE_HSSI_H_
+#define _HE_HSSI_H_
+
+#include "afu_mf_rawdev.h"
+#include "rte_pmd_afu.h"
+
+#define HE_HSSI_UUID_L    0xbb370242ac130002
+#define HE_HSSI_UUID_H    0x823c334c98bf11ea
+#define NUM_HE_HSSI_PORTS 8
+
+extern struct afu_mf_drv he_hssi_drv;
+
+/* HE-HSSI registers definition */
+#define TRAFFIC_CTRL_CMD    0x30
+#define TRAFFIC_CTRL_DATA   0x38
+#define TRAFFIC_CTRL_CH_SEL 0x40
+#define AFU_SCRATCHPAD      0x48
+
+#define TG_NUM_PKT        0x3c00
+#define TG_PKT_LEN_TYPE   0x3c01
+#define TG_DATA_PATTERN   0x3c02
+#define TG_START_XFR      0x3c03
+#define TG_STOP_XFR       0x3c04
+#define TG_SRC_MAC_L      0x3c05
+#define TG_SRC_MAC_H      0x3c06
+#define TG_DST_MAC_L      0x3c07
+#define TG_DST_MAC_H      0x3c08
+#define TG_PKT_XFRD       0x3c09
+#define TG_RANDOM_SEED(n) (0x3c0a + (n))
+#define TG_PKT_LEN        0x3c0d
+
+#define TM_NUM_PKT        0x3d00
+#define TM_PKT_GOOD       0x3d01
+#define TM_PKT_BAD        0x3d02
+#define TM_BYTE_CNT0      0x3d03
+#define TM_BYTE_CNT1      0x3d04
+#define TM_AVST_RX_ERR    0x3d07
+#define   OVERFLOW_ERR    (1 << 9)
+#define   LENGTH_ERR      (1 << 8)
+#define   OVERSIZE_ERR    (1 << 7)
+#define   UNDERSIZE_ERR   (1 << 6)
+#define   MAC_CRC_ERR     (1 << 5)
+#define   PHY_ERR         (1 << 4)
+#define   ERR_VALID       (1 << 3)
+
+#define LOOPBACK_EN          0x3e00
+#define LOOPBACK_FIFO_STATUS 0x3e01
+#define   ALMOST_EMPTY    (1 << 1)
+#define   ALMOST_FULL     (1 << 0)
+
+#define MAILBOX_TIMEOUT_MS       100
+#define MAILBOX_POLL_INTERVAL_MS 10
+
+struct traffic_ctrl_cmd {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_cmd:1;
+			uint32_t write_cmd:1;
+			uint32_t ack_trans:1;
+			uint32_t rsvd1:29;
+			uint32_t afu_cmd_addr:16;
+			uint32_t rsvd2:16;
+		};
+	};
+};
+
+struct traffic_ctrl_data {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t read_data;
+			uint32_t write_data;
+		};
+	};
+};
+
+struct traffic_ctrl_ch_sel {
+	union {
+		uint64_t csr;
+		struct {
+			uint32_t channel_sel:3;
+			uint32_t rsvd1:29;
+			uint32_t rsvd2;
+		};
+	};
+};
+
+struct he_hssi_ctx {
+	uint8_t *addr;
+};
+
+struct he_hssi_priv {
+	struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
+	struct he_hssi_ctx he_hssi_ctx;
+};
+
+#endif /* _HE_HSSI_H_ */
diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
index b53a31b..f304bc8 100644
--- a/drivers/raw/afu_mf/meson.build
+++ b/drivers/raw/afu_mf/meson.build
@@ -2,6 +2,7 @@
 # Copyright 2022 Intel Corporation
 
 deps += ['rawdev', 'bus_pci', 'bus_ifpga']
-sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c')
+sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
+	'he_hssi.c')
 
 headers = files('rte_pmd_afu.h')
diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h b/drivers/raw/afu_mf/rte_pmd_afu.h
index 2f92f7e..89d866a 100644
--- a/drivers/raw/afu_mf/rte_pmd_afu.h
+++ b/drivers/raw/afu_mf/rte_pmd_afu.h
@@ -111,6 +111,22 @@ struct rte_pmd_afu_he_mem_tg_cfg {
 	uint32_t channel_mask;   /* mask of traffic generator channel */
 };
 
+/**
+ * HE-HSSI AFU configuration data structure.
+ */
+struct rte_pmd_afu_he_hssi_cfg {
+	uint32_t port;
+	uint32_t timeout;
+	uint32_t num_packets;
+	uint32_t random_length;
+	uint32_t packet_length;
+	uint32_t random_payload;
+	uint32_t rnd_seed[3];
+	uint64_t src_addr;
+	uint64_t dest_addr;
+	int he_loopback;
+};
+
 #ifdef __cplusplus
 }
 #endif
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver
  2022-05-27  5:37         ` [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver Wei Huang
@ 2022-06-06  1:38           ` Zhang, Tianfei
  2022-06-07  2:40             ` Huang, Wei
  0 siblings, 1 reply; 57+ messages in thread
From: Zhang, Tianfei @ 2022-06-06  1:38 UTC (permalink / raw)
  To: Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Huang, Wei <wei.huang@intel.com>
> Sent: Friday, May 27, 2022 1:37 PM
> To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>
> Subject: [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver
> 
> N3000 AFU includes NLB0 and DMA modules, NLB0 is used to test PCI bus
> and DMA is used to test local memory.
> This driver initialize the modules and report test result.
> 
> Signed-off-by: Wei Huang <wei.huang@intel.com>
> ---
>  drivers/raw/afu_mf/afu_mf_rawdev.c |    4 +
>  drivers/raw/afu_mf/afu_mf_rawdev.h |   18 +
>  drivers/raw/afu_mf/meson.build     |    4 +-
>  drivers/raw/afu_mf/n3000_afu.c     | 2005
> ++++++++++++++++++++++++++++++++++++
>  drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
>  drivers/raw/afu_mf/rte_pmd_afu.h   |   97 ++
>  6 files changed, 2460 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/raw/afu_mf/n3000_afu.c
>  create mode 100644 drivers/raw/afu_mf/n3000_afu.h
>  create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
> 
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> b/drivers/raw/afu_mf/afu_mf_rawdev.c
> index 5be372a..7c18f3b 100644
> --- a/drivers/raw/afu_mf/afu_mf_rawdev.c
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> @@ -17,15 +17,19 @@
>  #include <rte_memzone.h>
>  #include <rte_rawdev_pmd.h>
> 
> +#include "rte_pmd_afu.h"
>  #include "afu_mf_rawdev.h"
> +#include "n3000_afu.h"
> 
>  #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> 
>  static const struct rte_afu_uuid afu_uuid_map[] = {
> +	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
>  	{ 0, 0 /* sentinel */ }
>  };
> 
>  static struct afu_mf_drv *afu_table[] = {
> +	&n3000_afu_drv,
>  	NULL
>  };
> 
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h
> b/drivers/raw/afu_mf/afu_mf_rawdev.h
> index df6715c..5a66f6c 100644
> --- a/drivers/raw/afu_mf/afu_mf_rawdev.h
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
> @@ -30,6 +30,24 @@
>  #define AFU_MF_PMD_WARN(fmt, args...) \
>  	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
> 
> +#define CLS_TO_SIZE(n)  ((n) << 6)  /* get size of n cache lines */
> +#define SIZE_TO_CLS(s)  ((s) >> 6)  /* convert size to number of cache lines */
> +#define MHZ(f)  ((f) * 1000000)
> +
> +#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
> +({                                                       \
> +	uint64_t __wait = 0;                                 \
> +	uint64_t __invl = (invl);                            \
> +	uint64_t __timeout = (timeout);                      \
> +	for (; __wait <= __timeout; __wait += __invl) {      \
> +		(val) = *(addr);                                 \
> +		if (cond)                                        \
> +			break;                                       \
> +		rte_delay_ms(__invl);                            \
> +	}                                                    \
> +	(cond) ? 0 : 1;                                      \
> +})

Dsm means DMA?

> +
>  struct afu_mf_rawdev;
> 
>  struct afu_mf_ops {
> diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
> index 80526a2..8a989e3 100644
> --- a/drivers/raw/afu_mf/meson.build
> +++ b/drivers/raw/afu_mf/meson.build
> @@ -2,4 +2,6 @@
>  # Copyright 2022 Intel Corporation
> 
>  deps += ['rawdev', 'bus_pci', 'bus_ifpga']
> -sources = files('afu_mf_rawdev.c')
> +sources = files('afu_mf_rawdev.c', 'n3000_afu.c')
> +
> +headers = files('rte_pmd_afu.h')
> diff --git a/drivers/raw/afu_mf/n3000_afu.c b/drivers/raw/afu_mf/n3000_afu.c
> new file mode 100644
> index 0000000..19d7c54
> --- /dev/null
> +++ b/drivers/raw/afu_mf/n3000_afu.c
> @@ -0,0 +1,2005 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "n3000_afu.h"
> +
> +static int nlb_afu_config(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> +	struct nlb_csr_cfg v;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	cfg = &priv->nlb_cfg;
> +
> +	v.csr = 0;
> +
> +	if (cfg->cont)
> +		v.cont = 1;
> +
> +	if (cfg->cache_policy == NLB_WRPUSH_I)
> +		v.wrpush_i = 1;
> +	else
> +		v.wrthru_en = cfg->cache_policy;
> +
> +	if (cfg->cache_hint == NLB_RDLINE_MIXED)
> +		v.rdsel = 3;
> +	else
> +		v.rdsel = cfg->cache_hint;
> +
> +	v.mode = cfg->mode;
> +	v.chsel = cfg->read_vc;
> +	v.wr_chsel = cfg->write_vc;
> +	v.wrfence_chsel = cfg->wrfence_vc;
> +	v.wrthru_en = cfg->cache_policy;
> +	v.multicl_len = cfg->multi_cl - 1;
> +
> +	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
> +	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
> +
> +	return 0;
> +}
> +
> +static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> +	struct nlb_dsm_status *stat = NULL;
> +	uint64_t ticks = 0;
> +	double num, rd_bw, wr_bw;
> +
> +	if (!dev || !dev->priv)
> +		return;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +
> +	cfg = &priv->nlb_cfg;
> +	stat = priv->nlb_ctx.status_ptr;
> +
> +	if (cfg->cont)
> +		ticks = stat->num_clocks - stat->start_overhead;
> +	else
> +		ticks = stat->num_clocks -
> +			(stat->start_overhead + stat->end_overhead);
> +
> +	if (cfg->freq_mhz == 0)
> +		cfg->freq_mhz = 200;
> +
> +	num = (double)stat->num_reads;
> +	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +	num = (double)stat->num_writes;
> +	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +
> +	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
> +		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
> +	printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
> +		cl, stat->num_reads, stat->num_writes, ticks,
> +		rd_bw / 1e9, wr_bw / 1e9);
> +}
> +
> +static int nlb_afu_test(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct nlb_afu_ctx *ctx = NULL;
> +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> +	struct nlb_csr_ctl ctl;
> +	uint32_t *ptr = NULL;
> +	uint32_t i, j, cl, val = 0;
> +	uint64_t sval = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	ctx = &priv->nlb_ctx;
> +	cfg = &priv->nlb_cfg;
> +
> +	/* initialize registers */
> +	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
> +	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
> +
> +	ctl.csr = 0;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +	ctl.reset = 1;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
> +	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
> +
> +	ret = nlb_afu_config(dev);
> +	if (ret)
> +		return ret;
> +
> +	/* initialize src data */
> +	ptr = (uint32_t *)ctx->src_ptr;
> +	j = CLS_TO_SIZE(cfg->end) >> 2;
> +	for (i = 0; i < j; i++)
> +		*ptr++ = i;
> +
> +	/* start test */
> +	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
> +		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
> +		memset(ctx->dsm_ptr, 0, DSM_SIZE);
> +
> +		ctl.csr = 0;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		ctl.reset = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
> +
> +		rte_delay_us(10);
> +
> +		ctl.start = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		if (cfg->cont) {
> +			rte_delay_ms(cfg->timeout * 1000);
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +		} else {
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		}
> +
> +		nlb_afu_report(dev, cl);
> +
> +		i = 0;
> +		while (i++ < 100) {
> +			sval = rte_read64(ctx->addr + CSR_STATUS1);
> +			if (sval == 0)
> +				break;
> +			rte_delay_us(1000);
> +		}
> +
> +		ptr = (uint32_t *)ctx->dest_ptr;
> +		j = CLS_TO_SIZE(cl) >> 2;
> +		for (i = 0; i < j; i++) {
> +			if (*ptr++ != i) {
> +				AFU_MF_PMD_ERR("Data mismatch @ %u", i);
> +				break;
> +			}
> +		}
> +	}
> +
> +end:
> +	return ret;
> +}
> +
> +static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
> +{
> +	int i = 0;
> +
> +	if (!ctx)
> +		return;
> +
> +	for (i = 0; i < NUM_DMA_BUF; i++) {
> +		rte_free(ctx->dma_buf[i]);
> +		ctx->dma_buf[i] = NULL;
> +	}
> +
> +	rte_free(ctx->data_buf);
> +	ctx->data_buf = NULL;
> +
> +	rte_free(ctx->ref_buf);
> +	ctx->ref_buf = NULL;
> +}
> +
> +static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
> +	struct rte_pmd_afu_dma_cfg *cfg)
> +{
> +	size_t page_sz = sysconf(_SC_PAGE_SIZE);
> +	int i, ret = 0;
> +
> +	if (!ctx || !cfg)
> +		return -EINVAL;
> +
> +	for (i = 0; i < NUM_DMA_BUF; i++) {
> +		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
> +			TEST_MEM_ALIGN);
> +		if (!ctx->dma_buf[i]) {
> +			ret = -ENOMEM;
> +			goto free;
> +		}
> +		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
> +		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
> +			ret = -ENOMEM;
> +			goto free;
> +		}
> +	}
> +
> +	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
> +	if (!ctx->data_buf) {
> +		ret = -ENOMEM;
> +		goto free;
> +	}
> +
> +	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
> +	if (!ctx->ref_buf) {
> +		ret = -ENOMEM;
> +		goto free;
> +	}

Suppose that If ctx->ref_buf alloc fail, the dma_afu_buf_free() will work correct?

> +
> +	return 0;
> +
> +free:
> +	dma_afu_buf_free(ctx);
> +	return ret;
> +}
> +
> +static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
> +{
> +	int *ptr = NULL;
> +	size_t i = 0;
> +	size_t dword_size = 0;
> +
> +	if (!ctx || !size)
> +		return;
> +
> +	ptr = (int *)ctx->ref_buf;
> +
> +	if (ctx->pattern) {
> +		memset(ptr, ctx->pattern, size);
> +	} else {
> +		srand(99);
> +		dword_size = size >> 2;
> +		for (i = 0; i < dword_size; i++)
> +			*ptr++ = rand();
> +	}
> +	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
> +}
> +
> +static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
> +{
> +	uint8_t *src = NULL;
> +	uint8_t *dst = NULL;
> +	size_t i = 0;
> +	int n = 0;
> +
> +	if (!ctx || !size)
> +		return -EINVAL;
> +
> +	src = (uint8_t *)ctx->ref_buf;
> +	dst = (uint8_t *)ctx->data_buf;
> +
> +	if (memcmp(src, dst, size)) {
> +		printf("Transfer is corrupted\n");
> +		if (ctx->verbose) {
> +			for (i = 0; i < size; i++) {
> +				if (*src != *dst) {
> +					if (++n >= ERR_CHECK_LIMIT)
> +						break;
> +					printf("Mismatch at 0x%zx, "
> +						"Expected %02x  Actual
> %02x\n",
> +						i, *src, *dst);
> +				}
> +				src++;
> +				dst++;
> +			}
> +			if (n < ERR_CHECK_LIMIT) {
> +				printf("Found %d error bytes\n", n);
> +			} else {
> +				printf("......\n");
> +				printf("Found more than %d error bytes\n", n);
> +			}
> +		}
> +		return -1;
> +	}
> +
> +	printf("Transfer is verified\n");
> +	return 0;
> +}
> +
> +static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t
> bytes)
> +{
> +	uint64_t qwords = bytes / sizeof(uint64_t);
> +
> +	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
> +		!IS_ALIGNED_QWORD((uint64_t)bytes))
> +		return;
> +
> +	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
> +		rte_write64(*host_addr, dev_addr);
> +}
> +
> +static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t
> bytes)
> +{
> +	uint64_t qwords = bytes / sizeof(uint64_t);
> +
> +	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
> +		!IS_ALIGNED_QWORD((uint64_t)bytes))
> +		return;
> +
> +	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
> +		*host_addr = rte_read64(dev_addr);
> +}
> +
> +static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
> +{
> +	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
> +
> +	if (!ctx)
> +		return;
> +
> +	if (requested_page != ctx->cur_ase_page) {
> +		rte_write64(requested_page, ctx->ase_ctrl_addr);
> +		ctx->cur_ase_page = requested_page;
> +	}
> +}
> +
> +static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
> +	uint64_t host_addr, uint32_t count)
> +{
> +	uint64_t dev_aligned_addr = 0;
> +	uint64_t shift = 0;
> +	uint64_t val = 0;
> +	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)",
> host_addr,
> +		dev_addr, count);
> +
> +	if (!ctx || (count >= QWORD_BYTES))
> +		return -EINVAL;
> +
> +	if (!count)
> +		return 0;
> +
> +	switch_ase_page(ctx, dev_addr);
> +
> +	shift = dev_addr % QWORD_BYTES;
> +	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
> +	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
> +	rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
> +
> +	/* write back to device */
> +	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
> +
> +	return 0;
> +}
> +
> +static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
> +	uint64_t *src_ptr, uint64_t *count)
> +{
> +	uint64_t src = *src_ptr;
> +	uint64_t dst = *dst_ptr;
> +	uint64_t align_bytes = *count;
> +	uint64_t offset = 0;
> +	uint64_t left_in_page = DMA_ASE_WINDOW;
> +	uint64_t size_to_copy = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")",
> src, dst,
> +		align_bytes);
> +
> +	if (!ctx || !IS_ALIGNED_DWORD(dst))
> +		return -EINVAL;
> +
> +	if (align_bytes < DWORD_BYTES)
> +		return 0;
> +
> +	if (!IS_ALIGNED_QWORD(dst)) {
> +		/* Write out a single DWORD to get QWORD aligned */
> +		switch_ase_page(ctx, dst);
> +		offset = dst & DMA_ASE_WINDOW_MASK;
> +
> +		rte_write32(*(uint32_t *)(uintptr_t)src,
> +			ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	if (!align_bytes)
> +		return 0;
> +
> +	/* Write out blocks of 64-bit values */
> +	while (align_bytes >= QWORD_BYTES) {
> +		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
> +		size_to_copy =
> +			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES -
> 1)));
> +		if (size_to_copy < QWORD_BYTES)
> +			break;
> +		switch_ase_page(ctx, dst);
> +		offset = dst & DMA_ASE_WINDOW_MASK;
> +		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
> +			(uint64_t *)(uintptr_t)src, size_to_copy);
> +		src += size_to_copy;
> +		dst += size_to_copy;
> +		align_bytes -= size_to_copy;
> +	}
> +
> +	if (align_bytes >= DWORD_BYTES) {
> +		/* Write out remaining DWORD */
> +		switch_ase_page(ctx, dst);
> +		offset = dst & DMA_ASE_WINDOW_MASK;
> +		rte_write32(*(uint32_t *)(uintptr_t)src,
> +			ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	*src_ptr = src;
> +	*dst_ptr = dst;
> +	*count = align_bytes;
> +
> +	return 0;
> +}
> +
> +static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
> +	uint64_t *src_ptr, uint64_t count)
> +{
> +	uint64_t dst = *dst_ptr;
> +	uint64_t src = *src_ptr;
> +	uint64_t count_left = count;
> +	uint64_t unaligned_size = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")",
> src, dst,
> +		count);
> +
> +	/* aligns address to 8 byte using dst masking method */
> +	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
> +		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
> +		if (unaligned_size > count_left)
> +			unaligned_size = count_left;
> +		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
> +		if (ret)
> +			return ret;
> +		count_left -= unaligned_size;
> +		src += unaligned_size;
> +		dst += unaligned_size;
> +	}
> +
> +	/* Handles 8/4 byte MMIO transfer */
> +	ret = ase_write(ctx, &dst, &src, &count_left);
> +	if (ret)
> +		return ret;
> +
> +	/* Left over unaligned bytes transferred using dst masking method */
> +	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
> +	if (unaligned_size > count_left)
> +		unaligned_size = count_left;
> +
> +	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
> +	if (ret)
> +		return ret;
> +
> +	count_left -= unaligned_size;
> +	*dst_ptr = dst + unaligned_size;
> +	*src_ptr = src + unaligned_size;
> +
> +	return 0;
> +}
> +
> +static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
> +	uint64_t host_addr, uint32_t count)
> +{
> +	uint64_t dev_aligned_addr = 0;
> +	uint64_t shift = 0;
> +	uint64_t val = 0;
> +	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)",
> host_addr,
> +		dev_addr, count);
> +
> +	if (!ctx || (count >= QWORD_BYTES))
> +		return -EINVAL;
> +
> +	if (!count)
> +		return 0;
> +
> +	switch_ase_page(ctx, dev_addr);
> +
> +	shift = dev_addr % QWORD_BYTES;
> +	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
> +	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
> +	rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
> +
> +	return 0;
> +}
> +
> +static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
> +	uint64_t *dst_ptr, uint64_t *count)
> +{
> +	uint64_t src = *src_ptr;
> +	uint64_t dst = *dst_ptr;
> +	uint64_t align_bytes = *count;
> +	uint64_t offset = 0;
> +	uint64_t left_in_page = DMA_ASE_WINDOW;
> +	uint64_t size_to_copy = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")",
> dst, src,
> +		align_bytes);
> +
> +	if (!ctx || !IS_ALIGNED_DWORD(src))
> +		return -EINVAL;
> +
> +	if (align_bytes < DWORD_BYTES)
> +		return 0;
> +
> +	if (!IS_ALIGNED_QWORD(src)) {
> +		/* Read a single DWORD to get QWORD aligned */
> +		switch_ase_page(ctx, src);
> +		offset = src & DMA_ASE_WINDOW_MASK;
> +		*(uint32_t *)(uintptr_t)dst =
> +			rte_read32(ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	if (!align_bytes)
> +		return 0;
> +
> +	/* Read blocks of 64-bit values */
> +	while (align_bytes >= QWORD_BYTES) {
> +		left_in_page -= src & DMA_ASE_WINDOW_MASK;
> +		size_to_copy =
> +			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES -
> 1)));
> +		if (size_to_copy < QWORD_BYTES)
> +			break;
> +		switch_ase_page(ctx, src);
> +		offset = src & DMA_ASE_WINDOW_MASK;
> +		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
> +			(uint64_t *)(uintptr_t)dst, size_to_copy);
> +		src += size_to_copy;
> +		dst += size_to_copy;
> +		align_bytes -= size_to_copy;
> +	}
> +
> +	if (align_bytes >= DWORD_BYTES) {
> +		/* Read remaining DWORD */
> +		switch_ase_page(ctx, src);
> +		offset = src & DMA_ASE_WINDOW_MASK;
> +		*(uint32_t *)(uintptr_t)dst =
> +			rte_read32(ctx->ase_data_addr + offset);
> +		src += DWORD_BYTES;
> +		dst += DWORD_BYTES;
> +		align_bytes -= DWORD_BYTES;
> +	}
> +
> +	*src_ptr = src;
> +	*dst_ptr = dst;
> +	*count = align_bytes;
> +
> +	return 0;
> +}
> +
> +static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
> +	uint64_t *dst_ptr, uint64_t count)
> +{
> +	uint64_t src = *src_ptr;
> +	uint64_t dst = *dst_ptr;
> +	uint64_t count_left = count;
> +	uint64_t unaligned_size = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")",
> src, dst,
> +		count);
> +
> +	/* Aligns address to 8 byte using src masking method */
> +	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
> +		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
> +		if (unaligned_size > count_left)
> +			unaligned_size = count_left;
> +		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
> +		if (ret)
> +			return ret;
> +		count_left -= unaligned_size;
> +		dst += unaligned_size;
> +		src += unaligned_size;
> +	}
> +
> +	/* Handles 8/4 byte MMIO transfer */
> +	ret = ase_read(ctx, &src, &dst, &count_left);
> +	if (ret)
> +		return ret;
> +
> +	/* Left over unaligned bytes transferred using src masking method */
> +	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
> +	if (unaligned_size > count_left)
> +		unaligned_size = count_left;
> +
> +	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
> +	if (ret)
> +		return ret;
> +
> +	count_left -= unaligned_size;
> +	*dst_ptr = dst + unaligned_size;
> +	*src_ptr = src + unaligned_size;
> +
> +	return 0;
> +}
> +
> +static void clear_interrupt(struct dma_afu_ctx *ctx)
> +{
> +	/* clear interrupt by writing 1 to IRQ bit in status register */
> +	msgdma_status status;
> +
> +	if (!ctx)
> +		return;
> +
> +	status.csr = 0;
> +	status.irq = 1;
> +	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
> +}
> +
> +static int poll_interrupt(struct dma_afu_ctx *ctx)
> +{
> +	struct pollfd pfd = {0};
> +	uint64_t count = 0;
> +	ssize_t bytes_read = 0;
> +	int poll_ret = 0;
> +	int ret = 0;
> +
> +	if (!ctx || (ctx->event_fd < 0))
> +		return -EINVAL;
> +
> +	pfd.fd = ctx->event_fd;
> +	pfd.events = POLLIN;
> +	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
> +	if (poll_ret < 0) {
> +		AFU_MF_PMD_ERR("Error %s", strerror(errno));
> +		ret = -EFAULT;
> +		goto out;
> +	} else if (poll_ret == 0) {
> +		AFU_MF_PMD_ERR("Timeout");
> +		ret = -ETIMEDOUT;
> +	} else {
> +		bytes_read = read(pfd.fd, &count, sizeof(count));
> +		if (bytes_read > 0) {
> +			if (ctx->verbose)
> +				AFU_MF_PMD_DEBUG("Successful, ret %d, cnt
> %"PRIu64,
> +					poll_ret, count);
> +			ret = 0;
> +		} else {
> +			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
> +				strerror(errno) : "zero bytes read");
> +			ret = -EIO;
> +		}
> +	}
> +out:
> +	clear_interrupt(ctx);
> +	return ret;
> +}
> +
> +static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
> +{
> +	msgdma_status status;
> +	uint64_t fpga_queue_full = 0;
> +
> +	if (!ctx)
> +		return;
> +
> +	if (ctx->verbose) {
> +		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
> +			desc->rd_address_ext, desc->rd_address);
> +		AFU_MF_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
> +			desc->wr_address_ext, desc->wr_address);
> +		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
> +		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
> +			desc->wr_burst_count);
> +		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
> +			desc->rd_burst_count);
> +		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc-
> >wr_stride);
> +		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc-
> >rd_stride);
> +	}
> +
> +	do {
> +		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
> +		if (fpga_queue_full++ > 100000000) {
> +			AFU_MF_PMD_DEBUG("DMA queue full retry");
> +			fpga_queue_full = 0;
> +		}
> +	} while (status.desc_buf_full);
> +
> +	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
> +		sizeof(*desc));
> +}
> +
> +static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
> +	int count, int is_last_desc, fpga_dma_type type, int intr_en)
> +{
> +	msgdma_ext_desc *desc = NULL;
> +	int alignment_offset = 0;
> +	int segment_size = 0;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	/* src, dst and count must be 64-byte aligned */
> +	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
> +		!IS_DMA_ALIGNED(count))
> +		return -EINVAL;
> +	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
> +
> +	/* these fields are fixed for all DMA transfers */
> +	desc = ctx->desc_buf;
> +	desc->seq_num = 0;
> +	desc->wr_stride = 1;
> +	desc->rd_stride = 1;
> +	desc->control.go = 1;
> +	if (intr_en)
> +		desc->control.transfer_irq_en = 1;
> +	else
> +		desc->control.transfer_irq_en = 0;
> +
> +	if (!is_last_desc)
> +		desc->control.early_done_en = 1;
> +	else
> +		desc->control.early_done_en = 0;
> +
> +	if (type == FPGA_TO_FPGA) {
> +		desc->rd_address = src & DMA_MASK_32_BIT;
> +		desc->wr_address = dst & DMA_MASK_32_BIT;
> +		desc->len = count;
> +		desc->wr_burst_count = 4;
> +		desc->rd_burst_count = 4;
> +		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
> +		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
> +		send_descriptor(ctx, desc);
> +	} else {
> +		/* check CCIP (host) address is aligned to 4CL (256B) */
> +		alignment_offset = (type == HOST_TO_FPGA)
> +			? (src % CCIP_ALIGN_BYTES) : (dst %
> CCIP_ALIGN_BYTES);
> +		/* performing a short transfer to get aligned */
> +		if (alignment_offset != 0) {
> +			desc->rd_address = src & DMA_MASK_32_BIT;
> +			desc->wr_address = dst & DMA_MASK_32_BIT;
> +			desc->wr_burst_count = 1;
> +			desc->rd_burst_count = 1;
> +			desc->rd_address_ext = (src >> 32) &
> DMA_MASK_32_BIT;
> +			desc->wr_address_ext = (dst >> 32) &
> DMA_MASK_32_BIT;
> +			/* count isn't large enough to hit next 4CL boundary */
> +			if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
> +				segment_size = count;
> +				count = 0;
> +			} else {
> +				segment_size = CCIP_ALIGN_BYTES
> +					- alignment_offset;
> +				src += segment_size;
> +				dst += segment_size;
> +				count -= segment_size;
> +				desc->control.transfer_irq_en = 0;
> +			}
> +			/* post short transfer to align to a 4CL (256 byte) */
> +			desc->len = segment_size;
> +			send_descriptor(ctx, desc);
> +		}
> +		/* at this point we are 4CL (256 byte) aligned */
> +		if (count >= CCIP_ALIGN_BYTES) {
> +			desc->rd_address = src & DMA_MASK_32_BIT;
> +			desc->wr_address = dst & DMA_MASK_32_BIT;
> +			desc->wr_burst_count = 4;
> +			desc->rd_burst_count = 4;
> +			desc->rd_address_ext = (src >> 32) &
> DMA_MASK_32_BIT;
> +			desc->wr_address_ext = (dst >> 32) &
> DMA_MASK_32_BIT;
> +			/* buffer ends on 4CL boundary */
> +			if ((count % CCIP_ALIGN_BYTES) == 0) {
> +				segment_size = count;
> +				count = 0;
> +			} else {
> +				segment_size = count
> +					- (count % CCIP_ALIGN_BYTES);
> +				src += segment_size;
> +				dst += segment_size;
> +				count -= segment_size;
> +				desc->control.transfer_irq_en = 0;
> +			}
> +			desc->len = segment_size;
> +			send_descriptor(ctx, desc);
> +		}
> +		/* post short transfer to handle the remainder */
> +		if (count > 0) {
> +			desc->rd_address = src & DMA_MASK_32_BIT;
> +			desc->wr_address = dst & DMA_MASK_32_BIT;
> +			desc->len = count;
> +			desc->wr_burst_count = 1;
> +			desc->rd_burst_count = 1;
> +			desc->rd_address_ext = (src >> 32) &
> DMA_MASK_32_BIT;
> +			desc->wr_address_ext = (dst >> 32) &
> DMA_MASK_32_BIT;
> +			if (intr_en)
> +				desc->control.transfer_irq_en = 1;
> +			send_descriptor(ctx, desc);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int issue_magic(struct dma_afu_ctx *ctx)
> +{
> +	*(ctx->magic_buf) = 0ULL;
> +	return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
> +		DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
> +}
> +
> +static void wait_magic(struct dma_afu_ctx *ctx)
> +{
> +	int magic_timeout = 0;
> +
> +	if (!ctx)
> +		return;
> +
> +	poll_interrupt(ctx);
> +	while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
> +		if (magic_timeout++ > 1000) {
> +			AFU_MF_PMD_ERR("DMA magic operation timeout");
> +			magic_timeout = 0;
> +			break;
> +		}
> +	}
> +	*(ctx->magic_buf) = 0ULL;
> +}
> +
> +static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
> +	uint64_t chunk, int is_last_chunk, int *intr_issued)
> +{
> +	int intr_en = 0;
> +	int ret = 0;
> +
> +	if (!ctx || !intr_issued)
> +		return -EINVAL;
> +
> +	src += chunk * ctx->dma_buf_size;
> +	dst += chunk * ctx->dma_buf_size;
> +
> +	if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) ||
> is_last_chunk) {
> +		if (*intr_issued) {
> +			ret = poll_interrupt(ctx);
> +			if (ret)
> +				return ret;
> +		}
> +		intr_en = 1;
> +	}
> +
> +	chunk %= NUM_DMA_BUF;
> +	rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
> +		ctx->dma_buf_size);
> +	ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
> +			ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
> +	if (intr_en)
> +		*intr_issued = 1;
> +
> +	return ret;
> +}
> +
> +static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t
> src,
> +	size_t count)
> +{
> +	uint64_t i = 0;
> +	uint64_t count_left = count;
> +	uint64_t aligned_addr = 0;
> +	uint64_t align_bytes = 0;
> +	uint64_t dma_chunks = 0;
> +	uint64_t dma_tx_bytes = 0;
> +	uint64_t offset = 0;
> +	int issued_intr = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
> +		count);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (!IS_DMA_ALIGNED(dst)) {
> +		if (count_left < DMA_ALIGN_BYTES)
> +			return ase_host_to_fpga(ctx, &dst, &src, count_left);
> +
> +		aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
> +			* DMA_ALIGN_BYTES;
> +		align_bytes = aligned_addr - dst;
> +		ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
> +		if (ret)
> +			return ret;
> +		count_left = count_left - align_bytes;
> +	}
> +
> +	if (count_left) {
> +		dma_chunks = count_left / ctx->dma_buf_size;
> +		offset = dma_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
> +			" (%"PRIu64"...0x%"PRIx64")",
> +			src, dst, dma_chunks, count_left);
> +		for (i = 0; i < dma_chunks; i++) {
> +			ret = dma_tx_buf(ctx, dst, src, i,
> +				i == (dma_chunks - 1), &issued_intr);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (issued_intr) {
> +			ret = poll_interrupt(ctx);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (count_left) {
> +			i = count_left / DMA_ALIGN_BYTES;
> +			if (i > 0) {
> +				dma_tx_bytes = i * DMA_ALIGN_BYTES;
> +				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64"
> to DMA",
> +					dma_tx_bytes);
> +				rte_memcpy(ctx->dma_buf[0],
> +					(void *)(uintptr_t)(src + offset),
> +					dma_tx_bytes);
> +				ret = do_dma(ctx, dst + offset,
> +					DMA_HOST_ADDR(ctx->dma_iova[0]),
> +					dma_tx_bytes, 1, HOST_TO_FPGA, 1);
> +				if (ret)
> +					return ret;
> +				ret = poll_interrupt(ctx);
> +				if (ret)
> +					return ret;
> +			}
> +
> +			count_left -= dma_tx_bytes;
> +			if (count_left) {
> +				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64"
> to ASE",
> +					count_left);
> +				dst += offset + dma_tx_bytes;
> +				src += offset + dma_tx_bytes;
> +				ret = ase_host_to_fpga(ctx, &dst, &src,
> +					count_left);
> +			}
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
> +	uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
> +{
> +	uint64_t i = chunk % NUM_DMA_BUF;
> +	uint64_t n = *rx_count;
> +	uint64_t num_pending = 0;
> +	int ret = 0;
> +
> +	if (!ctx || !wf_issued)
> +		return -EINVAL;
> +
> +	ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
> +		src + chunk * ctx->dma_buf_size,
> +		ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
> +	if (ret)
> +		return ret;
> +
> +	num_pending = chunk - n + 1;
> +	if (num_pending == HALF_DMA_BUF) {
> +		ret = issue_magic(ctx);
> +		if (ret) {
> +			AFU_MF_PMD_DEBUG("Magic issue failed");
> +			return ret;
> +		}
> +		*wf_issued = 1;
> +	}
> +
> +	if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
> +		if (*wf_issued) {
> +			wait_magic(ctx);
> +			for (i = 0; i < HALF_DMA_BUF; i++) {
> +				rte_memcpy((void *)(uintptr_t)(dst +
> +						n * ctx->dma_buf_size),
> +					ctx->dma_buf[n % NUM_DMA_BUF],
> +					ctx->dma_buf_size);
> +				n++;
> +			}
> +			*wf_issued = 0;
> +			*rx_count = n;
> +		}
> +		ret = issue_magic(ctx);
> +		if (ret) {
> +			AFU_MF_PMD_DEBUG("Magic issue failed");
> +			return ret;
> +		}
> +		*wf_issued = 1;
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t
> src,
> +	size_t count)
> +{
> +	uint64_t i = 0;
> +	uint64_t count_left = count;
> +	uint64_t aligned_addr = 0;
> +	uint64_t align_bytes = 0;
> +	uint64_t dma_chunks = 0;
> +	uint64_t pending_buf = 0;
> +	uint64_t dma_rx_bytes = 0;
> +	uint64_t offset = 0;
> +	int wf_issued = 0;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
> +		count);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (!IS_DMA_ALIGNED(src)) {
> +		if (count_left < DMA_ALIGN_BYTES)
> +			return ase_fpga_to_host(ctx, &src, &dst, count_left);
> +
> +		aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
> +			 * DMA_ALIGN_BYTES;
> +		align_bytes = aligned_addr - src;
> +		ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
> +		if (ret)
> +			return ret;
> +		count_left = count_left - align_bytes;
> +	}
> +
> +	if (count_left) {
> +		dma_chunks = count_left / ctx->dma_buf_size;
> +		offset = dma_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
> +			" (%"PRIu64"...0x%"PRIx64")",
> +			src, dst, dma_chunks, count_left);
> +		for (i = 0; i < dma_chunks; i++) {
> +			ret = dma_rx_buf(ctx, dst, src, i,
> +				i == (dma_chunks - 1),
> +				&pending_buf, &wf_issued);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (wf_issued)
> +			wait_magic(ctx);
> +
> +		/* clear out final dma memcpy operations */
> +		while (pending_buf < dma_chunks) {
> +			/* constant size transfer; no length check required */
> +			rte_memcpy((void *)(uintptr_t)(dst +
> +					pending_buf * ctx->dma_buf_size),
> +				ctx->dma_buf[pending_buf %
> NUM_DMA_BUF],
> +				ctx->dma_buf_size);
> +			pending_buf++;
> +		}
> +
> +		if (count_left > 0) {
> +			i = count_left / DMA_ALIGN_BYTES;
> +			if (i > 0) {
> +				dma_rx_bytes = i * DMA_ALIGN_BYTES;
> +				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64"
> to DMA",
> +					dma_rx_bytes);
> +				ret = do_dma(ctx,
> +					DMA_HOST_ADDR(ctx->dma_iova[0]),
> +					src + offset,
> +					dma_rx_bytes, 1, FPGA_TO_HOST, 0);
> +				if (ret)
> +					return ret;
> +				ret = issue_magic(ctx);
> +				if (ret)
> +					return ret;
> +				wait_magic(ctx);
> +				rte_memcpy((void *)(uintptr_t)(dst + offset),
> +					ctx->dma_buf[0], dma_rx_bytes);
> +			}
> +
> +			count_left -= dma_rx_bytes;
> +			if (count_left) {
> +				AFU_MF_PMD_DEBUG("left over 0x%"PRIx64"
> to ASE",
> +					count_left);
> +				dst += offset + dma_rx_bytes;
> +				src += offset + dma_rx_bytes;
> +				ret = ase_fpga_to_host(ctx, &src, &dst,
> +							count_left);
> +			}
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t
> src,
> +	size_t count)
> +{
> +	uint64_t i = 0;
> +	uint64_t count_left = count;
> +	uint64_t dma_chunks = 0;
> +	uint64_t offset = 0;
> +	uint32_t tx_chunks = 0;
> +	uint64_t *tmp_buf = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
> +		count);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
> +	    && IS_DMA_ALIGNED(count_left)) {
> +		dma_chunks = count_left / ctx->dma_buf_size;
> +		offset = dma_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
> +			" (%"PRIu64"...0x%"PRIx64")",
> +			src, dst, dma_chunks, count_left);
> +		for (i = 0; i < dma_chunks; i++) {
> +			ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
> +				src + i * ctx->dma_buf_size,
> +				ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
> +			if (ret)
> +				return ret;
> +			if ((((i + 1) % NUM_DMA_BUF) == 0) ||
> +				(i == (dma_chunks - 1))) {
> +				ret = issue_magic(ctx);
> +				if (ret)
> +					return ret;
> +				wait_magic(ctx);
> +			}
> +		}
> +
> +		if (count_left > 0) {
> +			AFU_MF_PMD_DEBUG("left over 0x%"PRIx64" to
> DMA", count_left);
> +			ret = do_dma(ctx, dst + offset, src + offset,
> +				count_left, 1, FPGA_TO_FPGA, 0);
> +			if (ret)
> +				return ret;
> +			ret = issue_magic(ctx);
> +			if (ret)
> +				return ret;
> +			wait_magic(ctx);
> +		}
> +	} else {
> +		if ((src < dst) && (src + count_left > dst)) {
> +			AFU_MF_PMD_ERR("Overlapping: 0x%"PRIx64
> +				" -> 0x%"PRIx64" (0x%"PRIx64")",
> +				src, dst, count_left);
> +			return -EINVAL;
> +		}
> +		tx_chunks = count_left / ctx->dma_buf_size;
> +		offset = tx_chunks * ctx->dma_buf_size;
> +		count_left -= offset;
> +		AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
> +			" (%u...0x%"PRIx64")",
> +			src, dst, tx_chunks, count_left);
> +		tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
> +			DMA_ALIGN_BYTES);
> +		for (i = 0; i < tx_chunks; i++) {
> +			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
> +				src + i * ctx->dma_buf_size,
> +				ctx->dma_buf_size);
> +			if (ret)
> +				goto free_buf;
> +			ret = dma_host_to_fpga(ctx,
> +				dst + i * ctx->dma_buf_size,
> +				(uint64_t)tmp_buf, ctx->dma_buf_size);
> +			if (ret)
> +				goto free_buf;
> +		}
> +
> +		if (count_left > 0) {
> +			ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
> +				src + offset, count_left);
> +			if (ret)
> +				goto free_buf;
> +			ret = dma_host_to_fpga(ctx, dst + offset,
> +				(uint64_t)tmp_buf, count_left);
> +			if (ret)
> +				goto free_buf;
> +		}
> +free_buf:
> +		rte_free(tmp_buf);
> +	}
> +
> +	return ret;
> +}
> +
> +static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
> +	uint64_t src, size_t count, fpga_dma_type type)
> +{
> +	int ret = 0;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	if (type == HOST_TO_FPGA)
> +		ret = dma_host_to_fpga(ctx, dst, src, count);
> +	else if (type == FPGA_TO_HOST)
> +		ret = dma_fpga_to_host(ctx, dst, src, count);
> +	else if (type == FPGA_TO_FPGA)
> +		ret = dma_fpga_to_fpga(ctx, dst, src, count);
> +	else
> +		return -EINVAL;
> +
> +	return ret;
> +}
> +
> +static double getTime(struct timespec start, struct timespec end)
> +{
> +	uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
> +		+ end.tv_nsec - start.tv_nsec;
> +	return (double)diff / (double)1000000000L;
> +}
> +
> +#define SWEEP_ITERS 1
> +static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
> +	uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
> +{
> +	struct timespec start, end;
> +	uint64_t test_size = 0;
> +	uint64_t *dma_buf_ptr = NULL;
> +	double throughput, total_time = 0.0;
> +	int i = 0;
> +	int ret = 0;
> +
> +	if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
> +		AFU_MF_PMD_ERR("Buffer for DMA test is not allocated");
> +		return -EINVAL;
> +	}
> +
> +	if (length < (buf_offset + size_decrement)) {
> +		AFU_MF_PMD_ERR("Test length does not match unaligned
> parameter");
> +		return -EINVAL;
> +	}
> +	test_size = length - (buf_offset + size_decrement);
> +	if ((ddr_offset + test_size) > ctx->mem_size) {
> +		AFU_MF_PMD_ERR("Test is out of DDR memory space");
> +		return -EINVAL;
> +	}
> +
> +	dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
> +	printf("Sweep Host %p to FPGA 0x%"PRIx64
> +		" with 0x%"PRIx64" bytes ...\n",
> +		(void *)dma_buf_ptr, ddr_offset, test_size);
> +
> +	for (i = 0; i < SWEEP_ITERS; i++) {
> +		clock_gettime(CLOCK_MONOTONIC, &start);
> +		ret = dma_transfer_sync(ctx, ddr_offset,
> (uint64_t)dma_buf_ptr,
> +			test_size, HOST_TO_FPGA);
> +		clock_gettime(CLOCK_MONOTONIC, &end);
> +		if (ret) {
> +			AFU_MF_PMD_ERR("Failed");
> +			return ret;
> +		}
> +		total_time += getTime(start, end);
> +	}
> +	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
> +	printf("Measured bandwidth = %lf MB/s\n", throughput);
> +
> +	printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes
> ...\n",
> +		ddr_offset, (void *)dma_buf_ptr, test_size);
> +
> +	total_time = 0.0;
> +	memset((char *)dma_buf_ptr, 0, test_size);
> +	for (i = 0; i < SWEEP_ITERS; i++) {
> +		clock_gettime(CLOCK_MONOTONIC, &start);
> +		ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr,
> ddr_offset,
> +			test_size, FPGA_TO_HOST);
> +		clock_gettime(CLOCK_MONOTONIC, &end);
> +		if (ret) {
> +			AFU_MF_PMD_ERR("Failed");
> +			return ret;
> +		}
> +		total_time += getTime(start, end);
> +	}
> +	throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
> +	printf("Measured bandwidth = %lf MB/s\n", throughput);
> +
> +	printf("Verifying buffer ...\n");
> +	return dma_afu_buf_verify(ctx, test_size);
> +}
> +
> +static int dma_afu_test(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct dma_afu_ctx *ctx = NULL;
> +	struct rte_pmd_afu_dma_cfg *cfg = NULL;
> +	msgdma_ctrl ctrl;
> +	uint64_t offset = 0;
> +	uint32_t i = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	cfg = &priv->dma_cfg;
> +	if (cfg->index >= NUM_N3000_DMA)
> +		return -EINVAL;
> +	ctx = &priv->dma_ctx[cfg->index];
> +
> +	ctx->pattern = (int)cfg->pattern;
> +	ctx->verbose = (int)cfg->verbose;
> +	ctx->dma_buf_size = cfg->size;
> +
> +	ret = dma_afu_buf_alloc(ctx, cfg);
> +	if (ret)
> +		goto free;
> +
> +	printf("Initialize test buffer\n");
> +	dma_afu_buf_init(ctx, cfg->length);
> +
> +	/* enable interrupt */
> +	ctrl.csr = 0;
> +	ctrl.global_intr_en_mask = 1;
> +	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
> +
> +	printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
> +		cfg->offset, cfg->length);
> +	ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
> +		cfg->length, HOST_TO_FPGA);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from host to
> FPGA");
> +		goto end;
> +	}
> +	memset(ctx->data_buf, 0, cfg->length);
> +
> +	printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
> +		ctx->data_buf, cfg->length);
> +	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
> +		cfg->length, FPGA_TO_HOST);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to
> host");
> +		goto end;
> +	}
> +	ret = dma_afu_buf_verify(ctx, cfg->length);
> +	if (ret)
> +		goto end;
> +
> +	if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
> +		offset = cfg->offset + cfg->length;
> +	else if (cfg->offset > cfg->length)
> +		offset = 0;
> +	else
> +		goto end;
> +
> +	printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
> +		cfg->offset, offset, cfg->length);
> +	ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
> +		FPGA_TO_FPGA);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to
> FPGA");
> +		goto end;
> +	}
> +
> +	printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
> +		ctx->data_buf, cfg->length);
> +	ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
> +		cfg->length, FPGA_TO_HOST);
> +	if (ret) {
> +		AFU_MF_PMD_ERR("Failed to transfer data from FPGA to
> host");
> +		goto end;
> +	}
> +	ret = dma_afu_buf_verify(ctx, cfg->length);
> +	if (ret)
> +		goto end;
> +
> +	printf("Sweep with aligned address and size\n");
> +	ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
> +	if (ret)
> +		goto end;
> +
> +	if (cfg->unaligned) {
> +		printf("Sweep with unaligned address and size\n");
> +		struct unaligned_set {
> +			uint64_t addr_offset;
> +			uint64_t size_dec;
> +		} param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
> +		for (i = 0; i < ARRAY_SIZE(param); i++) {
> +			ret = sweep_test(ctx, cfg->length, cfg->offset,
> +				param[i].addr_offset, param[i].size_dec);
> +			if (ret)
> +				break;
> +		}
> +	}
> +
> +end:
> +	/* disable interrupt */
> +	ctrl.global_intr_en_mask = 0;
> +	rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
> +
> +free:
> +	dma_afu_buf_free(ctx);
> +	return ret;
> +}
> +
> +static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_mf_rawdev
> *dev)
> +{
> +	struct rte_afu_device *afudev = NULL;
> +
> +	if (!dev || !dev->rawdev || !dev->rawdev->device)
> +		return NULL;
> +
> +	afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
> +	if (!afudev->rawdev || !afudev->rawdev->device)
> +		return NULL;
> +
> +	return RTE_DEV_TO_PCI(afudev->rawdev->device);
> +}
> +
> +#ifdef VFIO_PRESENT
> +static int dma_afu_set_irqs(struct afu_mf_rawdev *dev, uint32_t vec_start,
> +	uint32_t count, int *efds)
> +{
> +	struct rte_pci_device *pci_dev = NULL;
> +	struct vfio_irq_set *irq_set = NULL;
> +	int vfio_dev_fd = 0;
> +	size_t sz = 0;
> +	int ret = 0;
> +
> +	if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
> +		return -EINVAL;
> +
> +	pci_dev = n3000_afu_get_pci_dev(dev);
> +	if (!pci_dev)
> +		return -ENODEV;
> +	vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
> +
> +	sz = sizeof(*irq_set) + sizeof(*efds) * count;
> +	irq_set = rte_zmalloc(NULL, sz, 0);
> +	if (!irq_set)
> +		return -ENOMEM;
> +
> +	irq_set->argsz = (uint32_t)sz;
> +	irq_set->count = count;
> +	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
> +		VFIO_IRQ_SET_ACTION_TRIGGER;
> +	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
> +	irq_set->start = vec_start;
> +
> +	rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
> +	ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
> +	if (ret)
> +		AFU_MF_PMD_ERR("Error enabling MSI-X interrupts\n");
> +
> +	rte_free(irq_set);
> +	return ret;
> +}
> +#endif
> +
> +static void *n3000_afu_get_port_addr(struct afu_mf_rawdev *dev)
> +{
> +	struct rte_pci_device *pci_dev = NULL;
> +	uint8_t *addr = NULL;
> +	uint64_t val = 0;
> +	uint32_t bar = 0;
> +
> +	pci_dev = n3000_afu_get_pci_dev(dev);
> +	if (!pci_dev)
> +		return NULL;
> +
> +	addr = (uint8_t *)pci_dev->mem_resource[0].addr;
> +	val = rte_read64(addr + PORT_ATTR_REG(dev->port));
> +	if (!PORT_IMPLEMENTED(val)) {
> +		AFU_MF_PMD_INFO("FIU port %d is not implemented", dev-
> >port);
> +		return NULL;
> +	}
> +
> +	bar = PORT_BAR(val);
> +	if (bar >= PCI_MAX_RESOURCE) {
> +		AFU_MF_PMD_ERR("BAR index %u is out of limit", bar);
> +		return NULL;
> +	}
> +
> +	addr = (uint8_t *)pci_dev->mem_resource[bar].addr +
> PORT_OFFSET(val);
> +	return addr;
> +}
> +
> +static int n3000_afu_get_irq_capability(struct afu_mf_rawdev *dev,
> +	uint32_t *vec_start, uint32_t *vec_count)
> +{
> +	uint8_t *addr = NULL;
> +	uint64_t val = 0;
> +	uint64_t header = 0;
> +	uint64_t next_offset = 0;
> +
> +	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
> +	if (!addr)
> +		return -ENOENT;
> +
> +	do {
> +		addr += next_offset;
> +		header = rte_read64(addr);
> +		if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
> +			(DFH_FEATURE_ID(header) ==
> PORT_FEATURE_UINT_ID)) {
> +			val = rte_read64(addr + PORT_UINT_CAP_REG);
> +			if (vec_start)
> +				*vec_start = PORT_VEC_START(val);
> +			if (vec_count)
> +				*vec_count = PORT_VEC_COUNT(val);
> +			return 0;
> +		}
> +		next_offset = DFH_NEXT_OFFSET(header);
> +		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
> +			break;
> +	} while (!DFH_EOL(header));
> +
> +	return -ENOENT;
> +}
> +
> +static int nlb_afu_ctx_release(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct nlb_afu_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->nlb_ctx;
> +
> +	rte_free(ctx->dsm_ptr);
> +	ctx->dsm_ptr = NULL;
> +	ctx->status_ptr = NULL;
> +
> +	rte_free(ctx->src_ptr);
> +	ctx->src_ptr = NULL;
> +
> +	rte_free(ctx->dest_ptr);
> +	ctx->dest_ptr = NULL;
> +
> +	return 0;
> +}
> +
> +static int nlb_afu_ctx_init(struct afu_mf_rawdev *dev, uint8_t *addr)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct nlb_afu_ctx *ctx = NULL;
> +	int ret = 0;
> +
> +	if (!dev || !addr)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->nlb_ctx;
> +	ctx->addr = addr;
> +
> +	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE,
> TEST_MEM_ALIGN);
> +	if (!ctx->dsm_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
> +	if (ctx->dsm_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->src_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
> +	if (ctx->src_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->dest_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}

Suppose that If ctx->dest_ptr fail, the nlb_afu_ctx_release() will work correct?

> +	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
> +	if (ctx->dest_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr +
> DSM_STATUS);
> +	return 0;
> +
> +release:
> +	nlb_afu_ctx_release(dev);
> +	return ret;
> +}
> +
> +static int dma_afu_ctx_release(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct dma_afu_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->dma_ctx[0];
> +
> +	rte_free(ctx->desc_buf);
> +	ctx->desc_buf = NULL;
> +
> +	rte_free(ctx->magic_buf);
> +	ctx->magic_buf = NULL;
> +
> +	close(ctx->event_fd);
> +	return 0;
> +}
> +
> +static int dma_afu_ctx_init(struct afu_mf_rawdev *dev, int index, uint8_t
> *addr)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct dma_afu_ctx *ctx = NULL;
> +	uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000,
> 0x1000000};
> +	static int efds[1] = {0};
> +	uint32_t vec_start = 0;
> +	int ret = 0;
> +
> +	if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->dma_ctx[index];
> +	ctx->index = index;
> +	ctx->addr = addr;
> +	ctx->csr_addr = addr + DMA_CSR;
> +	ctx->desc_addr = addr + DMA_DESC;
> +	ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
> +	ctx->ase_data_addr = addr + DMA_ASE_DATA;
> +	ctx->mem_size = mem_sz[ctx->index];
> +	ctx->cur_ase_page = INVALID_ASE_PAGE;
> +	if (ctx->index == 0) {
> +		ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
> +		if (ret)
> +			return ret;
> +
> +		efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
> +		if (efds[0] < 0) {
> +			AFU_MF_PMD_ERR("eventfd create failed");
> +			return -EBADF;
> +		}
> +#ifdef VFIO_PRESENT
> +		if (dma_afu_set_irqs(dev, vec_start, 1, efds))
> +			AFU_MF_PMD_ERR("DMA interrupt setup failed");
> +#endif
> +	}
> +	ctx->event_fd = efds[0];
> +
> +	ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
> +		sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
> +	if (!ctx->desc_buf) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->magic_buf) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}

Suppose that If ctx->magic_buf fail, the dma_afu_ctx_release () will work correct?

> +	ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
> +	if (ctx->magic_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	return 0;
> +
> +release:
> +	dma_afu_ctx_release(dev);
> +	return ret;
> +}
> +
> +static int n3000_afu_ctx_init(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	uint8_t *addr = NULL;
> +	uint64_t header = 0;
> +	uint64_t uuid_hi = 0;
> +	uint64_t uuid_lo = 0;
> +	uint64_t next_offset = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	addr = (uint8_t *)dev->addr;
> +	do {
> +		addr += next_offset;
> +		header = rte_read64(addr);
> +		uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
> +		uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
> +
> +		if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
> +			(uuid_lo == N3000_NLB0_UUID_L) &&
> +			(uuid_hi == N3000_NLB0_UUID_H)) {
> +			AFU_MF_PMD_INFO("AFU NLB0 found @ %p", (void
> *)addr);
> +			ret = nlb_afu_ctx_init(dev, addr);
> +			if (ret)
> +				return ret;
> +		} else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
> +			(uuid_lo == N3000_DMA_UUID_L) &&
> +			(uuid_hi == N3000_DMA_UUID_H) &&
> +			(priv->num_dma < NUM_N3000_DMA)) {
> +			AFU_MF_PMD_INFO("AFU DMA%d found @ %p",
> +				priv->num_dma, (void *)addr);
> +			ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
> +			if (ret)
> +				return ret;
> +			priv->num_dma++;
> +		} else {
> +			AFU_MF_PMD_DEBUG("DFH: type %"PRIu64
> +				", uuid %016"PRIx64"%016"PRIx64,
> +				DFH_TYPE(header), uuid_hi, uuid_lo);
> +		}
> +
> +		next_offset = DFH_NEXT_OFFSET(header);
> +		if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
> +			break;
> +	} while (!DFH_EOL(header));
> +
> +	return 0;
> +}
> +
> +static int n3000_afu_init(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv) {
> +		dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
> +		if (!dev->priv)
> +			return -ENOMEM;
> +	}
> +
> +	return n3000_afu_ctx_init(dev);
> +}
> +
> +static int n3000_afu_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	struct rte_pmd_afu_n3000_cfg *cfg = NULL;
> +	int i = 0;
> +	uint64_t top = 0;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
> +		return -EINVAL;
> +
> +	cfg = (struct rte_pmd_afu_n3000_cfg *)config;
> +	if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
> +		if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
> +			(cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
> +			return -EINVAL;
> +		if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
> +			return -EINVAL;
> +		if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
> +			return -EINVAL;
> +		if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.multi_cl != 1) &&
> +			(cfg->nlb_cfg.multi_cl != 2) &&
> +			(cfg->nlb_cfg.multi_cl != 4))
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
> +			(cfg->nlb_cfg.begin > MAX_CACHE_LINES))
> +			return -EINVAL;
> +		if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
> +			(cfg->nlb_cfg.end > MAX_CACHE_LINES))
> +			return -EINVAL;
> +		rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
> +			sizeof(struct rte_pmd_afu_nlb_cfg));
> +	} else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
> +		if (cfg->dma_cfg.index >= NUM_N3000_DMA)
> +			return -EINVAL;
> +		i = cfg->dma_cfg.index;
> +		if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
> +			return -EINVAL;
> +		if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
> +			return -EINVAL;
> +		top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
> +		if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
> +			return -EINVAL;
> +		if (i == 3) {  /* QDR connected to DMA3 */
> +			if (cfg->dma_cfg.length & 0x3f) {
> +				cfg->dma_cfg.length &= ~0x3f;
> +				AFU_MF_PMD_INFO("Round size to %x for
> QDR",
> +					cfg->dma_cfg.length);
> +			}
> +		}
> +		rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
> +			sizeof(struct rte_pmd_afu_dma_cfg));
> +	} else {
> +		AFU_MF_PMD_ERR("Invalid type of N3000 AFU");
> +		return -EINVAL;
> +	}
> +
> +	priv->cfg_type = cfg->type;
> +	return 0;
> +}
> +
> +static int n3000_afu_test(struct afu_mf_rawdev *dev)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv)
> +		return -ENOENT;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +
> +	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
> +		AFU_MF_PMD_INFO("Test NLB");
> +		ret = nlb_afu_test(dev);
> +	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
> +		AFU_MF_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
> +		ret = dma_afu_test(dev);
> +	} else {
> +		AFU_MF_PMD_ERR("Please configure AFU before test");
> +		ret = -EINVAL;
> +	}
> +
> +	return ret;
> +}
> +
> +static int n3000_afu_close(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev)
> +		return -EINVAL;
> +
> +	nlb_afu_ctx_release(dev);
> +	dma_afu_ctx_release(dev);
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int n3000_afu_dump(struct afu_mf_rawdev *dev, FILE *f)
> +{
> +	struct n3000_afu_priv *priv = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct n3000_afu_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
> +		struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
> +		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +		fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
> +		fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
> +		fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
> +		fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
> +		fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
> +		fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
> +		fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
> +	} else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
> +		struct dma_afu_ctx *ctx = &priv->dma_ctx[priv-
> >dma_cfg.index];
> +		fprintf(f, "index:\t\t%d\n", ctx->index);
> +		fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +		fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
> +		fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
> +		fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
> +		fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
> +		fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
> +		fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
> +		fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
> +	} else {
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int n3000_afu_reset(struct afu_mf_rawdev *dev)
> +{
> +	uint8_t *addr = NULL;
> +	uint64_t val = 0;
> +
> +	addr = (uint8_t *)n3000_afu_get_port_addr(dev);
> +	if (!addr)
> +		return -ENOENT;
> +
> +	val = rte_read64(addr + PORT_CTRL_REG);
> +	val |= PORT_SOFT_RESET;
> +	rte_write64(val, addr + PORT_CTRL_REG);
> +	rte_delay_us(100);
> +	val &= ~PORT_SOFT_RESET;
> +	rte_write64(val, addr + PORT_CTRL_REG);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops n3000_afu_ops = {
> +	.init = n3000_afu_init,
> +	.config = n3000_afu_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = n3000_afu_test,
> +	.close = n3000_afu_close,
> +	.dump = n3000_afu_dump,
> +	.reset = n3000_afu_reset
> +};
> +
> +struct afu_mf_drv n3000_afu_drv = {
> +	.uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
> +	.ops = &n3000_afu_ops
> +};
> diff --git a/drivers/raw/afu_mf/n3000_afu.h b/drivers/raw/afu_mf/n3000_afu.h
> new file mode 100644
> index 0000000..4c740da
> --- /dev/null
> +++ b/drivers/raw/afu_mf/n3000_afu.h
> @@ -0,0 +1,333 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _N3000_AFU_H_
> +#define _N3000_AFU_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define N3000_AFU_UUID_L  0xc000c9660d824272
> +#define N3000_AFU_UUID_H  0x9aeffe5f84570612
> +#define N3000_NLB0_UUID_L 0xf89e433683f9040b
> +#define N3000_NLB0_UUID_H 0xd8424dc4a4a3c413
> +#define N3000_DMA_UUID_L  0xa9149a35bace01ea
> +#define N3000_DMA_UUID_H  0xef82def7f6ec40fc
> +
> +extern struct afu_mf_drv n3000_afu_drv;
> +
> +#define NUM_N3000_DMA  4
> +#define MAX_MSIX_VEC   7
> +
> +/* N3000 DFL definition */
> +#define DFH_UUID_L_OFFSET  8
> +#define DFH_UUID_H_OFFSET  16
> +#define DFH_TYPE(hdr)  (((hdr) >> 60) & 0xf)
> +#define DFH_TYPE_AFU  1
> +#define DFH_TYPE_BBB  2
> +#define DFH_TYPE_PRIVATE  3
> +#define DFH_EOL(hdr)  (((hdr) >> 40) & 0x1)
> +#define DFH_NEXT_OFFSET(hdr)  (((hdr) >> 16) & 0xffffff)
> +#define DFH_FEATURE_ID(hdr)  ((hdr) & 0xfff)
> +#define PORT_ATTR_REG(n)  (((n) << 3) + 0x38)
> +#define PORT_IMPLEMENTED(attr)  (((attr) >> 60) & 0x1)
> +#define PORT_BAR(attr)  (((attr) >> 32) & 0x7)
> +#define PORT_OFFSET(attr)  ((attr) & 0xffffff)
> +#define PORT_FEATURE_UINT_ID  0x12
> +#define PORT_UINT_CAP_REG  0x8
> +#define PORT_VEC_START(cap)  (((cap) >> 12) & 0xfff)
> +#define PORT_VEC_COUNT(cap)  ((cap) >> 12 & 0xfff)
> +#define PORT_CTRL_REG  0x38
> +#define PORT_SOFT_RESET  (0x1 << 0)
> +
> +/* NLB registers definition */
> +#define CSR_SCRATCHPAD0    0x100
> +#define CSR_SCRATCHPAD1    0x108
> +#define CSR_AFU_DSM_BASEL  0x110
> +#define CSR_AFU_DSM_BASEH  0x114
> +#define CSR_SRC_ADDR       0x120
> +#define CSR_DST_ADDR       0x128
> +#define CSR_NUM_LINES      0x130
> +#define CSR_CTL            0x138
> +#define CSR_CFG            0x140
> +#define CSR_INACT_THRESH   0x148
> +#define CSR_INTERRUPT0     0x150
> +#define CSR_SWTEST_MSG     0x158
> +#define CSR_STATUS0        0x160
> +#define CSR_STATUS1        0x168
> +#define CSR_ERROR          0x170
> +#define CSR_STRIDE         0x178
> +#define CSR_HE_INFO0       0x180
> +
> +#define DSM_SIZE           0x200000
> +#define DSM_STATUS         0x40
> +#define DSM_POLL_INTERVAL  5  /* ms */
> +#define DSM_TIMEOUT        1000  /* ms */
> +
> +#define NLB_BUF_SIZE  0x400000
> +#define TEST_MEM_ALIGN  1024
> +
> +struct nlb_csr_ctl {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t reset:1;
> +			uint32_t start:1;
> +			uint32_t force_completion:1;
> +			uint32_t reserved:29;
> +		};
> +	};
> +};
> +
> +struct nlb_csr_cfg {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t wrthru_en:1;
> +			uint32_t cont:1;
> +			uint32_t mode:3;
> +			uint32_t multicl_len:2;
> +			uint32_t rsvd1:1;
> +			uint32_t delay_en:1;
> +			uint32_t rdsel:2;
> +			uint32_t rsvd2:1;
> +			uint32_t chsel:3;
> +			uint32_t rsvd3:1;
> +			uint32_t wrpush_i:1;
> +			uint32_t wr_chsel:3;
> +			uint32_t rsvd4:3;
> +			uint32_t test_cfg:5;
> +			uint32_t interrupt_on_error:1;
> +			uint32_t interrupt_testmode:1;
> +			uint32_t wrfence_chsel:2;
> +		};
> +	};
> +};
> +
> +struct nlb_status0 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_writes;
> +			uint32_t num_reads;
> +		};
> +	};
> +};
> +
> +struct nlb_status1 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_pend_writes;
> +			uint32_t num_pend_reads;
> +		};
> +	};
> +};
> +
> +struct nlb_dsm_status {
> +	uint32_t test_complete;
> +	uint32_t test_error;
> +	uint64_t num_clocks;
> +	uint32_t num_reads;
> +	uint32_t num_writes;
> +	uint32_t start_overhead;
> +	uint32_t end_overhead;
> +};
> +
> +/* DMA registers definition */
> +#define DMA_CSR       0x40
> +#define DMA_DESC      0x60
> +#define DMA_ASE_CTRL  0x200
> +#define DMA_ASE_DATA  0x1000
> +
> +#define DMA_ASE_WINDOW       4096
> +#define DMA_ASE_WINDOW_MASK  ((uint64_t)(DMA_ASE_WINDOW - 1))
> +#define INVALID_ASE_PAGE     0xffffffffffffffffULL
> +
> +#define DMA_WF_MAGIC             0x5772745F53796E63ULL
> +#define DMA_WF_MAGIC_ROM         0x1000000000000
> +#define DMA_HOST_ADDR(addr)      ((addr) | 0x2000000000000)
> +#define DMA_WF_HOST_ADDR(addr)   ((addr) | 0x3000000000000)
> +
> +#define NUM_DMA_BUF   8
> +#define HALF_DMA_BUF  (NUM_DMA_BUF / 2)
> +
> +#define DMA_MASK_32_BIT 0xFFFFFFFF
> +
> +#define DMA_CSR_BUSY           0x1
> +#define DMA_DESC_BUFFER_EMPTY  0x2
> +#define DMA_DESC_BUFFER_FULL   0x4
> +
> +#define DWORD_BYTES 4
> +#define IS_ALIGNED_DWORD(addr) (((addr) % DWORD_BYTES) == 0)
> +
> +#define QWORD_BYTES 8
> +#define IS_ALIGNED_QWORD(addr) (((addr) % QWORD_BYTES) == 0)
> +
> +#define DMA_ALIGN_BYTES 64
> +#define IS_DMA_ALIGNED(addr) (((addr) % DMA_ALIGN_BYTES) == 0)
> +
> +#define CCIP_ALIGN_BYTES (DMA_ALIGN_BYTES << 2)
> +
> +#define DMA_TIMEOUT_MSEC  5000
> +
> +#define MAGIC_BUF_SIZE  64
> +#define ERR_CHECK_LIMIT  64
> +
> +#ifndef MIN
> +#define MIN(a, b) ((a) < (b) ? (a) : (b))
> +#endif
> +
> +#ifndef ARRAY_SIZE
> +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
> +#endif
> +
> +typedef enum {
> +	HOST_TO_FPGA = 0,
> +	FPGA_TO_HOST,
> +	FPGA_TO_FPGA,
> +	FPGA_MAX_TRANSFER_TYPE,
> +} fpga_dma_type;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t tx_channel:8;
> +		uint32_t generate_sop:1;
> +		uint32_t generate_eop:1;
> +		uint32_t park_reads:1;
> +		uint32_t park_writes:1;
> +		uint32_t end_on_eop:1;
> +		uint32_t reserved_1:1;
> +		uint32_t transfer_irq_en:1;
> +		uint32_t early_term_irq_en:1;
> +		uint32_t trans_error_irq_en:8;
> +		uint32_t early_done_en:1;
> +		uint32_t reserved_2:6;
> +		uint32_t go:1;
> +	};
> +} msgdma_desc_ctrl;
> +
> +typedef struct __rte_packed {
> +	uint32_t rd_address;
> +	uint32_t wr_address;
> +	uint32_t len;
> +	uint16_t seq_num;
> +	uint8_t rd_burst_count;
> +	uint8_t wr_burst_count;
> +	uint16_t rd_stride;
> +	uint16_t wr_stride;
> +	uint32_t rd_address_ext;
> +	uint32_t wr_address_ext;
> +	msgdma_desc_ctrl control;
> +} msgdma_ext_desc;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t busy:1;
> +		uint32_t desc_buf_empty:1;
> +		uint32_t desc_buf_full:1;
> +		uint32_t rsp_buf_empty:1;
> +		uint32_t rsp_buf_full:1;
> +		uint32_t stopped:1;
> +		uint32_t resetting:1;
> +		uint32_t stopped_on_error:1;
> +		uint32_t stopped_on_early_term:1;
> +		uint32_t irq:1;
> +		uint32_t reserved:22;
> +	};
> +} msgdma_status;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t stop_dispatcher:1;
> +		uint32_t reset_dispatcher:1;
> +		uint32_t stop_on_error:1;
> +		uint32_t stopped_on_early_term:1;
> +		uint32_t global_intr_en_mask:1;
> +		uint32_t stop_descriptors:1;
> +		uint32_t reserved:22;
> +	};
> +} msgdma_ctrl;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t rd_fill_level:16;
> +		uint32_t wr_fill_level:16;
> +	};
> +} msgdma_fill_level;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t rsp_fill_level:16;
> +		uint32_t reserved:16;
> +	};
> +} msgdma_rsp_level;
> +
> +typedef union {
> +	uint32_t csr;
> +	struct {
> +		uint32_t rd_seq_num:16;
> +		uint32_t wr_seq_num:16;
> +	};
> +} msgdma_seq_num;
> +
> +typedef struct __rte_packed {
> +	msgdma_status status;
> +	msgdma_ctrl ctrl;
> +	msgdma_fill_level fill_level;
> +	msgdma_rsp_level rsp;
> +	msgdma_seq_num seq_num;
> +} msgdma_csr;
> +
> +#define CSR_STATUS(csr)   (&(((msgdma_csr *)(csr))->status))
> +#define CSR_CONTROL(csr)  (&(((msgdma_csr *)(csr))->ctrl))
> +
> +struct nlb_afu_ctx {
> +	uint8_t *addr;
> +	uint8_t *dsm_ptr;
> +	uint64_t dsm_iova;
> +	uint8_t *src_ptr;
> +	uint64_t src_iova;
> +	uint8_t *dest_ptr;
> +	uint64_t dest_iova;
> +	struct nlb_dsm_status *status_ptr;
> +};
> +
> +struct dma_afu_ctx {
> +	int index;
> +	uint8_t *addr;
> +	uint8_t *csr_addr;
> +	uint8_t *desc_addr;
> +	uint8_t *ase_ctrl_addr;
> +	uint8_t *ase_data_addr;
> +	uint64_t mem_size;
> +	uint64_t cur_ase_page;
> +	int event_fd;
> +	int verbose;
> +	int pattern;
> +	void *data_buf;
> +	void *ref_buf;
> +	msgdma_ext_desc *desc_buf;
> +	uint64_t *magic_buf;
> +	uint64_t magic_iova;
> +	uint32_t dma_buf_size;
> +	uint64_t *dma_buf[NUM_DMA_BUF];
> +	uint64_t dma_iova[NUM_DMA_BUF];
> +};
> +
> +struct n3000_afu_priv {
> +	struct rte_pmd_afu_nlb_cfg nlb_cfg;
> +	struct rte_pmd_afu_dma_cfg dma_cfg;
> +	struct nlb_afu_ctx nlb_ctx;
> +	struct dma_afu_ctx dma_ctx[NUM_N3000_DMA];
> +	int num_dma;
> +	int cfg_type;
> +};
> +
> +#endif /* _N3000_AFU_H_ */
> diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h
> b/drivers/raw/afu_mf/rte_pmd_afu.h
> new file mode 100644
> index 0000000..f14a053
> --- /dev/null
> +++ b/drivers/raw/afu_mf/rte_pmd_afu.h
> @@ -0,0 +1,97 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2022 Intel Corporation
> + */
> +
> +#ifndef __RTE_PMD_AFU_H__
> +#define __RTE_PMD_AFU_H__
> +
> +/**
> + * @file rte_pmd_afu.h
> + *
> + * AFU PMD specific definitions.
> + *
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> + *
> + */
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <stdint.h>
> +
> +#define RTE_PMD_AFU_N3000_NLB   1
> +#define RTE_PMD_AFU_N3000_DMA   2
> +
> +#define NLB_MODE_LPBK      0
> +#define NLB_MODE_READ      1
> +#define NLB_MODE_WRITE     2
> +#define NLB_MODE_TRPUT     3
> +
> +#define NLB_VC_AUTO        0
> +#define NLB_VC_VL0         1
> +#define NLB_VC_VH0         2
> +#define NLB_VC_VH1         3
> +#define NLB_VC_RANDOM      4
> +
> +#define NLB_WRLINE_M       0
> +#define NLB_WRLINE_I       1
> +#define NLB_WRPUSH_I       2
> +
> +#define NLB_RDLINE_S       0
> +#define NLB_RDLINE_I       1
> +#define NLB_RDLINE_MIXED   2
> +
> +#define MIN_CACHE_LINES   1
> +#define MAX_CACHE_LINES   1024
> +
> +#define MIN_DMA_BUF_SIZE  64
> +#define MAX_DMA_BUF_SIZE  (1023 * 1024)
> +
> +/**
> + * NLB AFU configuration data structure.
> + */
> +struct rte_pmd_afu_nlb_cfg {
> +	uint32_t mode;
> +	uint32_t begin;
> +	uint32_t end;
> +	uint32_t multi_cl;
> +	uint32_t cont;
> +	uint32_t timeout;
> +	uint32_t cache_policy;
> +	uint32_t cache_hint;
> +	uint32_t read_vc;
> +	uint32_t write_vc;
> +	uint32_t wrfence_vc;
> +	uint32_t freq_mhz;
> +};
> +
> +/**
> + * DMA AFU configuration data structure.
> + */
> +struct rte_pmd_afu_dma_cfg {
> +	uint32_t index;     /* index of DMA controller */
> +	uint32_t length;    /* total length of data to DMA */
> +	uint32_t offset;    /* address offset of target memory */
> +	uint32_t size;      /* size of transfer buffer */
> +	uint32_t pattern;   /* data pattern to fill in test buffer */
> +	uint32_t unaligned; /* use unaligned address or length in sweep test */
> +	uint32_t verbose;   /* enable verbose error information in test */
> +};
> +
> +/**
> + * N3000 AFU configuration data structure.
> + */
> +struct rte_pmd_afu_n3000_cfg {
> +	int type;   /* RTE_PMD_AFU_N3000_NLB or
> RTE_PMD_AFU_N3000_DMA */
> +	union {
> +		struct rte_pmd_afu_nlb_cfg nlb_cfg;
> +		struct rte_pmd_afu_dma_cfg dma_cfg;
> +	};
> +};
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* __RTE_PMD_AFU_H__ */
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 3/5] raw/afu_mf: add HE-LBK AFU driver
  2022-05-27  5:37         ` [PATCH v5 3/5] raw/afu_mf: add HE-LBK " Wei Huang
@ 2022-06-06  1:41           ` Zhang, Tianfei
  2022-06-07  2:42             ` Huang, Wei
  0 siblings, 1 reply; 57+ messages in thread
From: Zhang, Tianfei @ 2022-06-06  1:41 UTC (permalink / raw)
  To: Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Huang, Wei <wei.huang@intel.com>
> Sent: Friday, May 27, 2022 1:37 PM
> To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>
> Subject: [PATCH v5 3/5] raw/afu_mf: add HE-LBK AFU driver
> 
> HE-LBK and HE-MEM-LBK are host exerciser modules in OFS FPGA, HE-LBK is
> used to test PCI bus and HE-MEM-LBK is used to test local memory.
> This driver initialize the modules and report test result.
> 
> Signed-off-by: Wei Huang <wei.huang@intel.com>
> ---
>  drivers/raw/afu_mf/afu_mf_rawdev.c |   5 +
>  drivers/raw/afu_mf/he_lbk.c        | 427
> +++++++++++++++++++++++++++++++++++++
>  drivers/raw/afu_mf/he_lbk.h        | 121 +++++++++++
>  drivers/raw/afu_mf/meson.build     |   2 +-
>  drivers/raw/afu_mf/rte_pmd_afu.h   |  14 ++
>  5 files changed, 568 insertions(+), 1 deletion(-)  create mode 100644
> drivers/raw/afu_mf/he_lbk.c  create mode 100644 drivers/raw/afu_mf/he_lbk.h
> 
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> b/drivers/raw/afu_mf/afu_mf_rawdev.c
> index 7c18f3b..e91eb21 100644
> --- a/drivers/raw/afu_mf/afu_mf_rawdev.c
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> @@ -20,16 +20,21 @@
>  #include "rte_pmd_afu.h"
>  #include "afu_mf_rawdev.h"
>  #include "n3000_afu.h"
> +#include "he_lbk.h"
> 
>  #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> 
>  static const struct rte_afu_uuid afu_uuid_map[] = {
>  	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
> +	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
> +	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
>  	{ 0, 0 /* sentinel */ }
>  };
> 
>  static struct afu_mf_drv *afu_table[] = {
>  	&n3000_afu_drv,
> +	&he_lbk_drv,
> +	&he_mem_lbk_drv,
>  	NULL
>  };
> 
> diff --git a/drivers/raw/afu_mf/he_lbk.c b/drivers/raw/afu_mf/he_lbk.c new file
> mode 100644 index 0000000..8735647
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_lbk.c
> @@ -0,0 +1,427 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "he_lbk.h"
> +
> +static int he_lbk_afu_config(struct afu_mf_rawdev *dev) {
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +	struct he_lbk_csr_cfg v;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_lbk_cfg;
> +
> +	v.csr = 0;
> +
> +	if (cfg->cont)
> +		v.cont = 1;
> +
> +	v.mode = cfg->mode;
> +	v.trput_interleave = cfg->trput_interleave;
> +	if (cfg->multi_cl == 4)
> +		v.multicl_len = 2;
> +	else
> +		v.multicl_len = cfg->multi_cl - 1;
> +
> +	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
> +	rte_write32(v.csr, priv->he_lbk_ctx.addr + CSR_CFG);
> +
> +	return 0;
> +}
> +
> +static void he_lbk_report(struct afu_mf_rawdev *dev, uint32_t cl) {
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +	struct he_lbk_dsm_status *stat = NULL;
> +	struct he_lbk_status0 stat0;
> +	struct he_lbk_status1 stat1;
> +	uint64_t swtest_msg = 0;
> +	uint64_t ticks = 0;
> +	uint64_t info = 0;
> +	double num, rd_bw, wr_bw;
> +
> +	if (!dev || !dev->priv)
> +		return;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	cfg = &priv->he_lbk_cfg;
> +	ctx = &priv->he_lbk_ctx;
> +
> +	stat = ctx->status_ptr;
> +
> +	swtest_msg = rte_read64(ctx->addr + CSR_SWTEST_MSG);
> +	stat0.csr = rte_read64(ctx->addr + CSR_STATUS0);
> +	stat1.csr = rte_read64(ctx->addr + CSR_STATUS1);
> +
> +	if (cfg->cont)
> +		ticks = stat->num_clocks - stat->start_overhead;
> +	else
> +		ticks = stat->num_clocks -
> +			(stat->start_overhead + stat->end_overhead);
> +
> +	if (cfg->freq_mhz == 0) {
> +		info = rte_read64(ctx->addr + CSR_HE_INFO0);
> +		AFU_MF_PMD_INFO("API version: %"PRIx64, info >> 16);
> +		cfg->freq_mhz = info & 0xffff;
> +		if (cfg->freq_mhz == 0) {
> +			AFU_MF_PMD_INFO("Frequency of AFU clock is
> unknown."
> +				" Assuming 350 MHz.");
> +			cfg->freq_mhz = 350;
> +		}
> +	}
> +
> +	num = (double)stat0.num_reads;
> +	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +	num = (double)stat0.num_writes;
> +	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> +
> +	printf("Cachelines  Read_Count Write_Count Pend_Read Pend_Write "
> +		"Clocks@%uMHz   Rd_Bandwidth   Wr_Bandwidth\n",
> +		cfg->freq_mhz);
> +	printf("%10u  %10u %10u %10u %10u  %12"PRIu64
> +		"   %7.3f GB/s   %7.3f GB/s\n",
> +		cl, stat0.num_reads, stat0.num_writes,
> +		stat1.num_pend_reads, stat1.num_pend_writes,
> +		ticks, rd_bw / 1e9, wr_bw / 1e9);
> +	printf("Test Message: 0x%"PRIx64"\n", swtest_msg); }
> +
> +static int he_lbk_test(struct afu_mf_rawdev *dev) {
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +	struct he_lbk_csr_ctl ctl;
> +	uint32_t *ptr = NULL;
> +	uint32_t i, j, cl, val = 0;
> +	uint64_t sval = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_lbk_cfg;
> +	ctx = &priv->he_lbk_ctx;
> +
> +	ctl.csr = 0;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +	rte_delay_us(1000);
> +	ctl.reset = 1;
> +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +	/* initialize DMA addresses */
> +	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
> +
> +	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
> +	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
> +
> +	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
> +	rte_write32(SIZE_TO_CLS(ctx->dsm_iova), ctx->addr +
> CSR_AFU_DSM_BASEL);
> +	rte_write32(SIZE_TO_CLS(ctx->dsm_iova) >> 32,
> +		ctx->addr + CSR_AFU_DSM_BASEH);
> +
> +	ret = he_lbk_afu_config(dev);
> +	if (ret)
> +		return ret;
> +
> +	/* initialize src data */
> +	ptr = (uint32_t *)ctx->src_ptr;
> +	j = CLS_TO_SIZE(cfg->end) >> 2;
> +	for (i = 0; i < j; i++)
> +		*ptr++ = i;
> +
> +	/* start test */
> +	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
> +		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
> +		memset(ctx->dsm_ptr, 0, DSM_SIZE);
> +
> +		ctl.csr = 0;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		rte_delay_us(1000);
> +		ctl.reset = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		rte_write32(cl - 1, ctx->addr + CSR_NUM_LINES);
> +
> +		ctl.start = 1;
> +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +
> +		if (cfg->cont) {
> +			rte_delay_ms(cfg->timeout * 1000);
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +		} else {
> +			ret = dsm_poll_timeout(&ctx->status_ptr-
> >test_complete,
> +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> +				DSM_TIMEOUT);
> +			if (ret) {
> +				printf("DSM poll timeout\n");
> +				goto end;
> +			}
> +			ctl.force_completion = 1;
> +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> +		}
> +
> +		he_lbk_report(dev, cl);
> +
> +		i = 0;
> +		while (i++ < 100) {
> +			sval = rte_read64(ctx->addr + CSR_STATUS1);
> +			if (sval == 0)
> +				break;
> +			rte_delay_us(1000);
> +		}
> +
> +		if (cfg->mode == NLB_MODE_LPBK) {
> +			ptr = (uint32_t *)ctx->dest_ptr;
> +			j = CLS_TO_SIZE(cl) >> 2;
> +			for (i = 0; i < j; i++) {
> +				if (*ptr++ != i) {
> +					AFU_MF_PMD_ERR("Data mismatch @
> %u", i);
> +					break;
> +				}
> +			}
> +		}
> +	}
> +
> +end:
> +	return 0;
> +}
> +
> +static int he_lbk_ctx_release(struct afu_mf_rawdev *dev) {
> +	struct he_lbk_priv *priv = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->he_lbk_ctx;
> +
> +	rte_free(ctx->dsm_ptr);
> +	ctx->dsm_ptr = NULL;
> +	ctx->status_ptr = NULL;
> +
> +	rte_free(ctx->src_ptr);
> +	ctx->src_ptr = NULL;
> +
> +	rte_free(ctx->dest_ptr);
> +	ctx->dest_ptr = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_lbk_ctx_init(struct afu_mf_rawdev *dev) {
> +	struct he_lbk_priv *priv = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	ctx = &priv->he_lbk_ctx;
> +	ctx->addr = (uint8_t *)dev->addr;
> +
> +	ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE,
> TEST_MEM_ALIGN);
> +	if (!ctx->dsm_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
> +	if (ctx->dsm_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->src_ptr) {
> +		ret = -ENOMEM;
> +		goto release;

If ctx->src_ptr alloc fail, he_lbk_ctx_release() can work?

> +	}
> +	ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
> +	if (ctx->src_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
> +		TEST_MEM_ALIGN);
> +	if (!ctx->dest_ptr) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +	ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
> +	if (ctx->dest_iova == RTE_BAD_IOVA) {
> +		ret = -ENOMEM;
> +		goto release;
> +	}
> +
> +	ctx->status_ptr = (struct he_lbk_dsm_status *)ctx->dsm_ptr;
> +	return 0;
> +
> +release:
> +	he_lbk_ctx_release(dev);
> +	return ret;
> +}
> +
> +static int he_lbk_init(struct afu_mf_rawdev *dev) {
> +	if (!dev)
> +		return -EINVAL;
> +
> +	if (!dev->priv) {
> +		dev->priv = rte_zmalloc(NULL, sizeof(struct he_lbk_priv), 0);
> +		if (!dev->priv)
> +			return -ENOMEM;
> +	}
> +
> +	return he_lbk_ctx_init(dev);
> +}
> +
> +static int he_lbk_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct he_lbk_priv *priv = NULL;
> +	struct rte_pmd_afu_he_lbk_cfg *cfg = NULL;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_he_lbk_cfg))
> +		return -EINVAL;
> +
> +	cfg = (struct rte_pmd_afu_he_lbk_cfg *)config;
> +	if (cfg->mode > NLB_MODE_TRPUT)
> +		return -EINVAL;
> +	if ((cfg->multi_cl != 1) && (cfg->multi_cl != 2) &&
> +		(cfg->multi_cl != 4))
> +		return -EINVAL;
> +	if ((cfg->begin < MIN_CACHE_LINES) || (cfg->begin >
> MAX_CACHE_LINES))
> +		return -EINVAL;
> +	if ((cfg->end < cfg->begin) || (cfg->end > MAX_CACHE_LINES))
> +		return -EINVAL;
> +
> +	rte_memcpy(&priv->he_lbk_cfg, cfg, sizeof(priv->he_lbk_cfg));
> +
> +	return 0;
> +}
> +
> +static int he_lbk_close(struct afu_mf_rawdev *dev) {
> +	if (!dev)
> +		return -EINVAL;
> +
> +	he_lbk_ctx_release(dev);
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_lbk_dump(struct afu_mf_rawdev *dev, FILE *f) {
> +	struct he_lbk_priv *priv = NULL;
> +	struct he_lbk_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_lbk_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	ctx = &priv->he_lbk_ctx;
> +
> +	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +	fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
> +	fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
> +	fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
> +	fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
> +	fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
> +	fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
> +	fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops he_lbk_ops = {
> +	.init = he_lbk_init,
> +	.config = he_lbk_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = he_lbk_test,
> +	.close = he_lbk_close,
> +	.dump = he_lbk_dump,
> +	.reset = NULL
> +};
> +
> +struct afu_mf_drv he_lbk_drv = {
> +	.uuid = { HE_LBK_UUID_L, HE_LBK_UUID_H },
> +	.ops = &he_lbk_ops
> +};
> +
> +struct afu_mf_drv he_mem_lbk_drv = {
> +	.uuid = { HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
> +	.ops = &he_lbk_ops
> +};
> diff --git a/drivers/raw/afu_mf/he_lbk.h b/drivers/raw/afu_mf/he_lbk.h new file
> mode 100644 index 0000000..c2e8a29
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_lbk.h
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _HE_LBK_H_
> +#define _HE_LBK_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define HE_LBK_UUID_L      0xb94b12284c31e02b
> +#define HE_LBK_UUID_H      0x56e203e9864f49a7
> +#define HE_MEM_LBK_UUID_L  0xbb652a578330a8eb #define
> HE_MEM_LBK_UUID_H
> +0x8568ab4e6ba54616
> +
> +extern struct afu_mf_drv he_lbk_drv;
> +extern struct afu_mf_drv he_mem_lbk_drv;
> +
> +/* HE-LBK & HE-MEM-LBK registers definition */
> +#define CSR_SCRATCHPAD0    0x100
> +#define CSR_SCRATCHPAD1    0x108
> +#define CSR_AFU_DSM_BASEL  0x110
> +#define CSR_AFU_DSM_BASEH  0x114
> +#define CSR_SRC_ADDR       0x120
> +#define CSR_DST_ADDR       0x128
> +#define CSR_NUM_LINES      0x130
> +#define CSR_CTL            0x138
> +#define CSR_CFG            0x140
> +#define CSR_INACT_THRESH   0x148
> +#define CSR_INTERRUPT0     0x150
> +#define CSR_SWTEST_MSG     0x158
> +#define CSR_STATUS0        0x160
> +#define CSR_STATUS1        0x168
> +#define CSR_ERROR          0x170
> +#define CSR_STRIDE         0x178
> +#define CSR_HE_INFO0       0x180
> +
> +#define DSM_SIZE           0x200000
> +#define DSM_POLL_INTERVAL  5  /* ms */
> +#define DSM_TIMEOUT        1000  /* ms */
> +
> +#define NLB_BUF_SIZE  0x400000
> +#define TEST_MEM_ALIGN  1024
> +
> +struct he_lbk_csr_ctl {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t reset:1;
> +			uint32_t start:1;
> +			uint32_t force_completion:1;
> +			uint32_t reserved:29;
> +		};
> +	};
> +};
> +
> +struct he_lbk_csr_cfg {
> +	union {
> +		uint32_t csr;
> +		struct {
> +			uint32_t rsvd1:1;
> +			uint32_t cont:1;
> +			uint32_t mode:3;
> +			uint32_t multicl_len:2;
> +			uint32_t rsvd2:13;
> +			uint32_t trput_interleave:3;
> +			uint32_t test_cfg:5;
> +			uint32_t interrupt_on_error:1;
> +			uint32_t interrupt_testmode:1;
> +			uint32_t rsvd3:2;
> +		};
> +	};
> +};
> +
> +struct he_lbk_status0 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_writes;
> +			uint32_t num_reads;
> +		};
> +	};
> +};
> +
> +struct he_lbk_status1 {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t num_pend_writes;
> +			uint32_t num_pend_reads;
> +		};
> +	};
> +};
> +
> +struct he_lbk_dsm_status {
> +	uint32_t test_complete;
> +	uint32_t test_error;
> +	uint64_t num_clocks;
> +	uint32_t num_reads;
> +	uint32_t num_writes;
> +	uint32_t start_overhead;
> +	uint32_t end_overhead;
> +};
> +
> +struct he_lbk_ctx {
> +	uint8_t *addr;
> +	uint8_t *dsm_ptr;
> +	uint64_t dsm_iova;
> +	uint8_t *src_ptr;
> +	uint64_t src_iova;
> +	uint8_t *dest_ptr;
> +	uint64_t dest_iova;
> +	struct he_lbk_dsm_status *status_ptr;
> +};
> +
> +struct he_lbk_priv {
> +	struct rte_pmd_afu_he_lbk_cfg he_lbk_cfg;
> +	struct he_lbk_ctx he_lbk_ctx;
> +};
> +
> +#endif /* _HE_LBK_H_ */
> diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
> index 8a989e3..a983f53 100644
> --- a/drivers/raw/afu_mf/meson.build
> +++ b/drivers/raw/afu_mf/meson.build
> @@ -2,6 +2,6 @@
>  # Copyright 2022 Intel Corporation
> 
>  deps += ['rawdev', 'bus_pci', 'bus_ifpga'] -sources = files('afu_mf_rawdev.c',
> 'n3000_afu.c')
> +sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c')
> 
>  headers = files('rte_pmd_afu.h')
> diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h
> b/drivers/raw/afu_mf/rte_pmd_afu.h
> index f14a053..658df55 100644
> --- a/drivers/raw/afu_mf/rte_pmd_afu.h
> +++ b/drivers/raw/afu_mf/rte_pmd_afu.h
> @@ -90,6 +90,20 @@ struct rte_pmd_afu_n3000_cfg {
>  	};
>  };
> 
> +/**
> + * HE-LBK & HE-MEM-LBK AFU configuration data structure.
> + */
> +struct rte_pmd_afu_he_lbk_cfg {
> +	uint32_t mode;
> +	uint32_t begin;
> +	uint32_t end;
> +	uint32_t multi_cl;
> +	uint32_t cont;
> +	uint32_t timeout;
> +	uint32_t trput_interleave;
> +	uint32_t freq_mhz;
> +};
> +
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 0/5] introduce afu_mf raw device driver
  2022-05-27  5:36       ` [PATCH v5 0/5] introduce afu_mf raw " Wei Huang
                           ` (4 preceding siblings ...)
  2022-05-27  5:37         ` [PATCH v5 5/5] raw/afu_mf: add HE-HSSI " Wei Huang
@ 2022-06-06  1:47         ` Zhang, Tianfei
  2022-06-07  2:34           ` Huang, Wei
  2022-06-09  2:44         ` [PATCH v6 0/5] introduce AFU PMD driver of FPGA Wei Huang
  6 siblings, 1 reply; 57+ messages in thread
From: Zhang, Tianfei @ 2022-06-06  1:47 UTC (permalink / raw)
  To: Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Huang, Wei <wei.huang@intel.com>
> Sent: Friday, May 27, 2022 1:37 PM
> To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>
> Subject: [PATCH v5 0/5] introduce afu_mf raw device driver

The title can change to: introduce AFU PMD driver of FPGA
> 
> The first patch implements the framework of the AFU raw device driver.

The first patch implements the framework of the AFU PMD driver based on raw device interfaces.

> The subsequent patches implement the driver of some AFUs.

Can we add the guide about how to use or test those AFU PMD drivers in documentation, like doc/guides/rawdevs/ifpga.rst.

> 
> Wei Huang (5):
>   drivers/raw: introduce AFU raw device driver
>   raw/afu_mf: add N3000 AFU driver
>   raw/afu_mf: add HE-LBK AFU driver
>   raw/afu_mf: add HE-MEM AFU driver
>   raw/afu_mf: add HE-HSSI AFU driver
> 
>  drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
>  drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
>  drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
>  drivers/raw/afu_mf/he_hssi.h       |  102 ++
>  drivers/raw/afu_mf/he_lbk.c        |  427 ++++++++
>  drivers/raw/afu_mf/he_lbk.h        |  121 +++
>  drivers/raw/afu_mf/he_mem.c        |  181 ++++
>  drivers/raw/afu_mf/he_mem.h        |   40 +
>  drivers/raw/afu_mf/meson.build     |    8 +
>  drivers/raw/afu_mf/n3000_afu.c     | 2005
> ++++++++++++++++++++++++++++++++++++
>  drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
>  drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
>  drivers/raw/afu_mf/version.map     |    3 +
>  drivers/raw/meson.build            |    1 +
>  14 files changed, 4253 insertions(+)
>  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
>  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
>  create mode 100644 drivers/raw/afu_mf/he_hssi.c  create mode 100644
> drivers/raw/afu_mf/he_hssi.h  create mode 100644
> drivers/raw/afu_mf/he_lbk.c  create mode 100644 drivers/raw/afu_mf/he_lbk.h
> create mode 100644 drivers/raw/afu_mf/he_mem.c  create mode 100644
> drivers/raw/afu_mf/he_mem.h  create mode 100644
> drivers/raw/afu_mf/meson.build  create mode 100644
> drivers/raw/afu_mf/n3000_afu.c  create mode 100644
> drivers/raw/afu_mf/n3000_afu.h  create mode 100644
> drivers/raw/afu_mf/rte_pmd_afu.h  create mode 100644
> drivers/raw/afu_mf/version.map
> 
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
  2022-05-27  5:37         ` [PATCH v5 1/5] drivers/raw: introduce AFU " Wei Huang
@ 2022-06-06  1:52           ` Zhang, Tianfei
  2022-06-06  2:00             ` Zhang, Tianfei
  2022-06-07  2:35             ` Huang, Wei
  2022-06-06 15:38           ` Stephen Hemminger
  1 sibling, 2 replies; 57+ messages in thread
From: Zhang, Tianfei @ 2022-06-06  1:52 UTC (permalink / raw)
  To: Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Huang, Wei <wei.huang@intel.com>
> Sent: Friday, May 27, 2022 1:37 PM
> To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>
> Subject: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> 
> Add multi-function AFU raw device driver to manage various AFU (Acceleration
> Function Unit) in FPGA.
> This driver implements common AFU raw device interfaces and exposes them to
> application as standard raw device APIs.
> Normal application can operate specified AFU as below, 1. call
> rte_rawdev_pmd_get_named_dev() to find AFU raw device.
> 2. call rte_rawdev_configure() to initialize AFU raw device.
> 3. call rte_rawdev_selftest() to test function of AFU.
> 
> Signed-off-by: Wei Huang <wei.huang@intel.com>
> ---
> v2: fix typo
> ---
> v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
> ---
> v4: fix coding style issue and build error in FreeBSD13-64
> ---
> v5: split patch into several patches
> ---
>  drivers/raw/afu_mf/afu_mf_rawdev.c | 425
> +++++++++++++++++++++++++++++++++++++
>  drivers/raw/afu_mf/afu_mf_rawdev.h |  71 +++++++
>  drivers/raw/afu_mf/meson.build     |   5 +
>  drivers/raw/afu_mf/version.map     |   3 +
>  drivers/raw/meson.build            |   1 +
>  5 files changed, 505 insertions(+)
>  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
>  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
>  create mode 100644 drivers/raw/afu_mf/meson.build  create mode 100644
> drivers/raw/afu_mf/version.map

I am thinking that just put those afu* c/h files into drivers/raw/ifpga/ folder is better? Because this AFU PMD driver is work on ifpga.
And the file name of "afu_mf_rawdev.c" change to "afu_pmd_driver.c"?

> 
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> b/drivers/raw/afu_mf/afu_mf_rawdev.c
> new file mode 100644
> index 0000000..5be372a
> --- /dev/null
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> @@ -0,0 +1,425 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memzone.h>
> +#include <rte_rawdev_pmd.h>
> +
> +#include "afu_mf_rawdev.h"
> +
> +#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> +
> +static const struct rte_afu_uuid afu_uuid_map[] = {
> +	{ 0, 0 /* sentinel */ }
> +};
> +
> +static struct afu_mf_drv *afu_table[] = {
> +	NULL
> +};
> +
> +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev) {
> +	int32_t x = 0;
> +
> +	if (!dev || !dev->shared)
> +		return -ENODEV;
> +
> +	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
> +
> +	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x,
> 1,
> +				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
> == 0))
> +		return -EBUSY;
> +
> +	return 0;
> +}
> +
> +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev) {
> +	if (!dev || !dev->shared)
> +		return;
> +
> +	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE); }
> +
> +static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
> +	rte_rawdev_obj_t config, size_t config_size) {
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	if (dev->ops && dev->ops->config)
> +		ret = (*dev->ops->config)(dev, config, config_size);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_start(struct rte_rawdev *rawdev) {
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please start it later");
> +		return ret;
> +	}
> +
> +	if (dev->ops && dev->ops->start)
> +		ret = (*dev->ops->start)(dev);
> +
> +	afu_mf_unlock(dev);
> +
> +	return ret;
> +}
> +
> +static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev) {
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
> +		return;
> +	}
> +
> +	if (dev->ops && dev->ops->stop)
> +		ret = (*dev->ops->stop)(dev);
> +
> +	afu_mf_unlock(dev);
> +}
> +
> +static int afu_mf_rawdev_close(struct rte_rawdev *rawdev) {
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	if (dev->ops && dev->ops->close)
> +		ret = (*dev->ops->close)(dev);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev) {
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
> +		return ret;
> +	}
> +
> +	if (dev->ops && dev->ops->reset)
> +		ret = (*dev->ops->reset)(dev);
> +
> +	afu_mf_unlock(dev);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_selftest(uint16_t dev_id) {
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
> +		return -ENODEV;
> +
> +	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
> +	if (!dev)
> +		return -ENOENT;
> +
> +	ret = afu_mf_trylock(dev);
> +	if (ret) {
> +		AFU_MF_PMD_WARN("AFU is busy, please test it later");
> +		return ret;
> +	}
> +
> +	if (dev->ops && dev->ops->test)
> +		ret = (*dev->ops->test)(dev);
> +
> +	afu_mf_unlock(dev);
> +
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f) {
> +	struct afu_mf_rawdev *dev = NULL;
> +	int ret = 0;
> +
> +	AFU_MF_PMD_FUNC_TRACE();
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	if (dev->ops && dev->ops->dump)
> +		ret = (*dev->ops->dump)(dev, f);
> +
> +	return ret;
> +}
> +
> +static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
> +	.dev_info_get = NULL,
> +	.dev_configure = afu_mf_rawdev_configure,
> +	.dev_start = afu_mf_rawdev_start,
> +	.dev_stop = afu_mf_rawdev_stop,
> +	.dev_close = afu_mf_rawdev_close,
> +	.dev_reset = afu_mf_rawdev_reset,
> +
> +	.queue_def_conf = NULL,
> +	.queue_setup = NULL,
> +	.queue_release = NULL,
> +	.queue_count = NULL,
> +
> +	.attr_get = NULL,
> +	.attr_set = NULL,
> +
> +	.enqueue_bufs = NULL,
> +	.dequeue_bufs = NULL,
> +
> +	.dump = afu_mf_rawdev_dump,
> +
> +	.xstats_get = NULL,
> +	.xstats_get_names = NULL,
> +	.xstats_get_by_name = NULL,
> +	.xstats_reset = NULL,
> +
> +	.firmware_status_get = NULL,
> +	.firmware_version_get = NULL,
> +	.firmware_load = NULL,
> +	.firmware_unload = NULL,
> +
> +	.dev_selftest = afu_mf_rawdev_selftest, };
> +
> +static int
> +afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
> +	int socket_id)
> +{
> +	const struct rte_memzone *mz;
> +	char mz_name[RTE_MEMZONE_NAMESIZE];
> +	struct afu_mf_shared *ptr = NULL;
> +	int init_mz = 0;
> +
> +	if (!name || !data)
> +		return -EINVAL;
> +
> +	/* name format is afu_?|??:??.? which is unique */
> +	snprintf(mz_name, sizeof(mz_name), "%s", name);
> +
> +	mz = rte_memzone_lookup(mz_name);
> +	if (!mz) {
> +		mz = rte_memzone_reserve(mz_name,
> +				sizeof(struct afu_mf_shared),
> +				socket_id, 0);
> +		init_mz = 1;
> +	}
> +
> +	if (!mz) {
> +		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
> +			mz_name);
> +		return -ENOMEM;
> +	}
> +
> +	ptr = (struct afu_mf_shared *)mz->addr;
> +
> +	if (init_mz)  /* initialize memory zone on the first time */
> +		ptr->lock = 0;
> +
> +	*data = ptr;
> +
> +	return 0;
> +}
> +
> +static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev, char
> *name,
> +	size_t size)
> +{
> +	int n = 0;
> +
> +	if (!afu_dev || !name || !size)
> +		return -EINVAL;
> +
> +	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
> +	if (n >= (int)size) {
> +		AFU_MF_PMD_ERR("Name of AFU device is too long!");
> +		return -ENAMETOOLONG;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id) {
> +	struct afu_mf_drv *entry = NULL;
> +	int i = 0;
> +
> +	if (!afu_id)
> +		return NULL;
> +
> +	while ((entry = afu_table[i++])) {
> +		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
> +			(entry->uuid.uuid_high == afu_id->uuid_high))
> +			break;
> +	}
> +
> +	return entry ? entry->ops : NULL;
> +}
> +
> +static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int
> +socket_id) {
> +	struct rte_rawdev *rawdev = NULL;
> +	struct afu_mf_rawdev *dev = NULL;
> +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> +	int ret = 0;
> +
> +	if (!afu_dev)
> +		return -EINVAL;
> +
> +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> +	if (ret)
> +		return ret;
> +
> +	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
> +		name, socket_id);
> +
> +	/* Allocate device structure */
> +	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct
> afu_mf_rawdev),
> +				socket_id);
> +	if (!rawdev) {
> +		AFU_MF_PMD_ERR("Unable to allocate raw device");
> +		return -ENOMEM;
> +	}
> +
> +	rawdev->dev_ops = &afu_mf_rawdev_ops;
> +	rawdev->device = &afu_dev->device;
> +	rawdev->driver_name = afu_dev->driver->driver.name;
> +
> +	dev = afu_mf_rawdev_get_priv(rawdev);
> +	if (!dev)
> +		goto cleanup;
> +
> +	dev->rawdev = rawdev;
> +	dev->port = afu_dev->id.port;
> +	dev->addr = afu_dev->mem_resource[0].addr;
> +	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
> +	if (dev->ops == NULL) {
> +		AFU_MF_PMD_ERR("Unsupported AFU device");
> +		goto cleanup;
> +	}
> +
> +	if (dev->ops->init) {
> +		ret = (*dev->ops->init)(dev);
> +		if (ret) {
> +			AFU_MF_PMD_ERR("Failed to init %s", name);
> +			goto cleanup;
> +		}
> +	}
> +
> +	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
> +	if (ret)
> +		goto cleanup;
> +
> +	return ret;
> +
> +cleanup:
> +	rte_rawdev_pmd_release(rawdev);
> +	return ret;
> +}
> +
> +static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev) {
> +	struct rte_rawdev *rawdev = NULL;
> +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> +	int ret = 0;
> +
> +	if (!afu_dev)
> +		return -EINVAL;
> +
> +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> +	if (ret)
> +		return ret;
> +
> +	AFU_MF_PMD_INFO("Destroy raw device %s", name);
> +
> +	rawdev = rte_rawdev_pmd_get_named_dev(name);
> +	if (!rawdev) {
> +		AFU_MF_PMD_ERR("Raw device %s not found", name);
> +		return -EINVAL;
> +	}
> +
> +	/* rte_rawdev_close is called by pmd_release */
> +	ret = rte_rawdev_pmd_release(rawdev);
> +	if (ret)
> +		AFU_MF_PMD_DEBUG("Device cleanup failed");
> +
> +	return 0;
> +}
> +
> +static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev) {
> +	AFU_MF_PMD_FUNC_TRACE();
> +	return afu_mf_rawdev_create(afu_dev, rte_socket_id()); }
> +
> +static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev) {
> +	AFU_MF_PMD_FUNC_TRACE();
> +	return afu_mf_rawdev_destroy(afu_dev); }
> +
> +static struct rte_afu_driver afu_mf_pmd_drv = {
> +	.id_table = afu_uuid_map,
> +	.probe = afu_mf_rawdev_probe,
> +	.remove = afu_mf_rawdev_remove
> +};
> +
> +RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME, afu_mf_pmd_drv);
> +RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h
> b/drivers/raw/afu_mf/afu_mf_rawdev.h
> new file mode 100644
> index 0000000..df6715c
> --- /dev/null
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
> @@ -0,0 +1,71 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2022 Intel Corporation
> + */
> +
> +#ifndef __AFU_MF_RAWDEV_H__
> +#define __AFU_MF_RAWDEV_H__
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +
> +#include <rte_cycles.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +extern int afu_mf_pmd_logtype;
> +
> +#define AFU_MF_PMD_LOG(level, fmt, args...) \
> +	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
> +		__func__, ##args)
> +
> +#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
> +
> +#define AFU_MF_PMD_DEBUG(fmt, args...) \
> +	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
> +#define AFU_MF_PMD_INFO(fmt, args...) \
> +	AFU_MF_PMD_LOG(INFO, fmt, ## args)
> +#define AFU_MF_PMD_ERR(fmt, args...) \
> +	AFU_MF_PMD_LOG(ERR, fmt, ## args)
> +#define AFU_MF_PMD_WARN(fmt, args...) \
> +	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
> +
> +struct afu_mf_rawdev;
> +
> +struct afu_mf_ops {
> +	int (*init)(struct afu_mf_rawdev *dev);
> +	int (*config)(struct afu_mf_rawdev *dev, void *config,
> +		size_t config_size);
> +	int (*start)(struct afu_mf_rawdev *dev);
> +	int (*stop)(struct afu_mf_rawdev *dev);
> +	int (*test)(struct afu_mf_rawdev *dev);
> +	int (*close)(struct afu_mf_rawdev *dev);
> +	int (*reset)(struct afu_mf_rawdev *dev);
> +	int (*dump)(struct afu_mf_rawdev *dev, FILE *f); };
> +
> +struct afu_mf_drv {
> +	struct rte_afu_uuid uuid;
> +	struct afu_mf_ops *ops;
> +};
> +
> +struct afu_mf_shared {
> +	int32_t lock;
> +};
> +
> +struct afu_mf_rawdev {
> +	struct rte_rawdev *rawdev;  /* point to parent raw device */
> +	struct afu_mf_shared *shared;  /* shared data for multi-process */
> +	struct afu_mf_ops *ops;  /* device operation functions */
> +	int port;  /* index of port the AFU attached */
> +	void *addr;  /* base address of AFU registers */
> +	void *priv;  /* private driver data */ };
> +
> +static inline struct afu_mf_rawdev *
> +afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev) {
> +	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL; }
> +
> +#endif /* __AFU_MF_RAWDEV_H__ */
> diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
> new file mode 100644 index 0000000..80526a2
> --- /dev/null
> +++ b/drivers/raw/afu_mf/meson.build
> @@ -0,0 +1,5 @@
> +# SPDX-License-Identifier: BSD-3-Clause # Copyright 2022 Intel
> +Corporation
> +
> +deps += ['rawdev', 'bus_pci', 'bus_ifpga'] sources =
> +files('afu_mf_rawdev.c')
> diff --git a/drivers/raw/afu_mf/version.map b/drivers/raw/afu_mf/version.map
> new file mode 100644 index 0000000..c2e0723
> --- /dev/null
> +++ b/drivers/raw/afu_mf/version.map
> @@ -0,0 +1,3 @@
> +DPDK_22 {
> +	local: *;
> +};
> diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build index
> 05e7de1..c3627f7 100644
> --- a/drivers/raw/meson.build
> +++ b/drivers/raw/meson.build
> @@ -6,6 +6,7 @@ if is_windows
>  endif
> 
>  drivers = [
> +        'afu_mf',
>          'cnxk_bphy',
>          'cnxk_gpio',
>          'dpaa2_cmdif',
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
  2022-06-06  1:52           ` Zhang, Tianfei
@ 2022-06-06  2:00             ` Zhang, Tianfei
  2022-06-07  2:36               ` Huang, Wei
  2022-06-07  2:35             ` Huang, Wei
  1 sibling, 1 reply; 57+ messages in thread
From: Zhang, Tianfei @ 2022-06-06  2:00 UTC (permalink / raw)
  To: Zhang, Tianfei, Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Zhang, Tianfei <tianfei.zhang@intel.com>
> Sent: Monday, June 6, 2022 9:53 AM
> To: Huang, Wei <wei.huang@intel.com>; dev@dpdk.org;
> thomas@monjalon.net; nipun.gupta@nxp.com; hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> 
> 
> 
> > -----Original Message-----
> > From: Huang, Wei <wei.huang@intel.com>
> > Sent: Friday, May 27, 2022 1:37 PM
> > To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> > hemant.agrawal@nxp.com
> > Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> > <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang,
> > Wei <wei.huang@intel.com>
> > Subject: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> >
> > Add multi-function AFU raw device driver to manage various AFU
> > (Acceleration Function Unit) in FPGA.
> > This driver implements common AFU raw device interfaces and exposes
> > them to application as standard raw device APIs.
> > Normal application can operate specified AFU as below, 1. call
> > rte_rawdev_pmd_get_named_dev() to find AFU raw device.
> > 2. call rte_rawdev_configure() to initialize AFU raw device.
> > 3. call rte_rawdev_selftest() to test function of AFU.
> >
> > Signed-off-by: Wei Huang <wei.huang@intel.com>
> > ---
> > v2: fix typo
> > ---
> > v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
> > ---
> > v4: fix coding style issue and build error in FreeBSD13-64
> > ---
> > v5: split patch into several patches
> > ---
> >  drivers/raw/afu_mf/afu_mf_rawdev.c | 425
> > +++++++++++++++++++++++++++++++++++++
> >  drivers/raw/afu_mf/afu_mf_rawdev.h |  71 +++++++
> >  drivers/raw/afu_mf/meson.build     |   5 +
> >  drivers/raw/afu_mf/version.map     |   3 +
> >  drivers/raw/meson.build            |   1 +
> >  5 files changed, 505 insertions(+)
> >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
> >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
> >  create mode 100644 drivers/raw/afu_mf/meson.build  create mode 100644
> > drivers/raw/afu_mf/version.map
> 
> I am thinking that just put those afu* c/h files into drivers/raw/ifpga/ folder is
> better? Because this AFU PMD driver is work on ifpga.
> And the file name of "afu_mf_rawdev.c" change to "afu_pmd_driver.c"?

Ohm, the file name of "afu_mf_rawdev.c" change to "afu_pmd_core.c"?
Because this file is the common code of AFU PMD driver, and specific function are in separate c file, like afu_pmd_n3000.c

> 
> >
> > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> > b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > new file mode 100644
> > index 0000000..5be372a
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > @@ -0,0 +1,425 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2022 Intel Corporation
> > + */
> > +
> > +#include <errno.h>
> > +#include <stdio.h>
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +#include <fcntl.h>
> > +#include <poll.h>
> > +#include <sys/eventfd.h>
> > +
> > +#include <rte_eal.h>
> > +#include <rte_malloc.h>
> > +#include <rte_memzone.h>
> > +#include <rte_rawdev_pmd.h>
> > +
> > +#include "afu_mf_rawdev.h"
> > +
> > +#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> > +
> > +static const struct rte_afu_uuid afu_uuid_map[] = {
> > +	{ 0, 0 /* sentinel */ }
> > +};
> > +
> > +static struct afu_mf_drv *afu_table[] = {
> > +	NULL
> > +};
> > +
> > +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev) {
> > +	int32_t x = 0;
> > +
> > +	if (!dev || !dev->shared)
> > +		return -ENODEV;
> > +
> > +	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
> > +
> > +	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x,
> > 1,
> > +				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)
> > == 0))
> > +		return -EBUSY;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev) {
> > +	if (!dev || !dev->shared)
> > +		return;
> > +
> > +	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE); }
> > +
> > +static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
> > +	rte_rawdev_obj_t config, size_t config_size) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	if (dev->ops && dev->ops->config)
> > +		ret = (*dev->ops->config)(dev, config, config_size);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_start(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please start it later");
> > +		return ret;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->start)
> > +		ret = (*dev->ops->start)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
> > +		return;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->stop)
> > +		ret = (*dev->ops->stop)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +}
> > +
> > +static int afu_mf_rawdev_close(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	if (dev->ops && dev->ops->close)
> > +		ret = (*dev->ops->close)(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
> > +		return ret;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->reset)
> > +		ret = (*dev->ops->reset)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_selftest(uint16_t dev_id) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
> > +		return -ENODEV;
> > +
> > +	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
> > +	if (!dev)
> > +		return -ENOENT;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please test it later");
> > +		return ret;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->test)
> > +		ret = (*dev->ops->test)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	if (dev->ops && dev->ops->dump)
> > +		ret = (*dev->ops->dump)(dev, f);
> > +
> > +	return ret;
> > +}
> > +
> > +static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
> > +	.dev_info_get = NULL,
> > +	.dev_configure = afu_mf_rawdev_configure,
> > +	.dev_start = afu_mf_rawdev_start,
> > +	.dev_stop = afu_mf_rawdev_stop,
> > +	.dev_close = afu_mf_rawdev_close,
> > +	.dev_reset = afu_mf_rawdev_reset,
> > +
> > +	.queue_def_conf = NULL,
> > +	.queue_setup = NULL,
> > +	.queue_release = NULL,
> > +	.queue_count = NULL,
> > +
> > +	.attr_get = NULL,
> > +	.attr_set = NULL,
> > +
> > +	.enqueue_bufs = NULL,
> > +	.dequeue_bufs = NULL,
> > +
> > +	.dump = afu_mf_rawdev_dump,
> > +
> > +	.xstats_get = NULL,
> > +	.xstats_get_names = NULL,
> > +	.xstats_get_by_name = NULL,
> > +	.xstats_reset = NULL,
> > +
> > +	.firmware_status_get = NULL,
> > +	.firmware_version_get = NULL,
> > +	.firmware_load = NULL,
> > +	.firmware_unload = NULL,
> > +
> > +	.dev_selftest = afu_mf_rawdev_selftest, };
> > +
> > +static int
> > +afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
> > +	int socket_id)
> > +{
> > +	const struct rte_memzone *mz;
> > +	char mz_name[RTE_MEMZONE_NAMESIZE];
> > +	struct afu_mf_shared *ptr = NULL;
> > +	int init_mz = 0;
> > +
> > +	if (!name || !data)
> > +		return -EINVAL;
> > +
> > +	/* name format is afu_?|??:??.? which is unique */
> > +	snprintf(mz_name, sizeof(mz_name), "%s", name);
> > +
> > +	mz = rte_memzone_lookup(mz_name);
> > +	if (!mz) {
> > +		mz = rte_memzone_reserve(mz_name,
> > +				sizeof(struct afu_mf_shared),
> > +				socket_id, 0);
> > +		init_mz = 1;
> > +	}
> > +
> > +	if (!mz) {
> > +		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
> > +			mz_name);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	ptr = (struct afu_mf_shared *)mz->addr;
> > +
> > +	if (init_mz)  /* initialize memory zone on the first time */
> > +		ptr->lock = 0;
> > +
> > +	*data = ptr;
> > +
> > +	return 0;
> > +}
> > +
> > +static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev,
> > +char
> > *name,
> > +	size_t size)
> > +{
> > +	int n = 0;
> > +
> > +	if (!afu_dev || !name || !size)
> > +		return -EINVAL;
> > +
> > +	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
> > +	if (n >= (int)size) {
> > +		AFU_MF_PMD_ERR("Name of AFU device is too long!");
> > +		return -ENAMETOOLONG;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id) {
> > +	struct afu_mf_drv *entry = NULL;
> > +	int i = 0;
> > +
> > +	if (!afu_id)
> > +		return NULL;
> > +
> > +	while ((entry = afu_table[i++])) {
> > +		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
> > +			(entry->uuid.uuid_high == afu_id->uuid_high))
> > +			break;
> > +	}
> > +
> > +	return entry ? entry->ops : NULL;
> > +}
> > +
> > +static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int
> > +socket_id) {
> > +	struct rte_rawdev *rawdev = NULL;
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> > +	int ret = 0;
> > +
> > +	if (!afu_dev)
> > +		return -EINVAL;
> > +
> > +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> > +	if (ret)
> > +		return ret;
> > +
> > +	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
> > +		name, socket_id);
> > +
> > +	/* Allocate device structure */
> > +	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct
> > afu_mf_rawdev),
> > +				socket_id);
> > +	if (!rawdev) {
> > +		AFU_MF_PMD_ERR("Unable to allocate raw device");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	rawdev->dev_ops = &afu_mf_rawdev_ops;
> > +	rawdev->device = &afu_dev->device;
> > +	rawdev->driver_name = afu_dev->driver->driver.name;
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		goto cleanup;
> > +
> > +	dev->rawdev = rawdev;
> > +	dev->port = afu_dev->id.port;
> > +	dev->addr = afu_dev->mem_resource[0].addr;
> > +	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
> > +	if (dev->ops == NULL) {
> > +		AFU_MF_PMD_ERR("Unsupported AFU device");
> > +		goto cleanup;
> > +	}
> > +
> > +	if (dev->ops->init) {
> > +		ret = (*dev->ops->init)(dev);
> > +		if (ret) {
> > +			AFU_MF_PMD_ERR("Failed to init %s", name);
> > +			goto cleanup;
> > +		}
> > +	}
> > +
> > +	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
> > +	if (ret)
> > +		goto cleanup;
> > +
> > +	return ret;
> > +
> > +cleanup:
> > +	rte_rawdev_pmd_release(rawdev);
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev) {
> > +	struct rte_rawdev *rawdev = NULL;
> > +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> > +	int ret = 0;
> > +
> > +	if (!afu_dev)
> > +		return -EINVAL;
> > +
> > +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> > +	if (ret)
> > +		return ret;
> > +
> > +	AFU_MF_PMD_INFO("Destroy raw device %s", name);
> > +
> > +	rawdev = rte_rawdev_pmd_get_named_dev(name);
> > +	if (!rawdev) {
> > +		AFU_MF_PMD_ERR("Raw device %s not found", name);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* rte_rawdev_close is called by pmd_release */
> > +	ret = rte_rawdev_pmd_release(rawdev);
> > +	if (ret)
> > +		AFU_MF_PMD_DEBUG("Device cleanup failed");
> > +
> > +	return 0;
> > +}
> > +
> > +static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev) {
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +	return afu_mf_rawdev_create(afu_dev, rte_socket_id()); }
> > +
> > +static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev) {
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +	return afu_mf_rawdev_destroy(afu_dev); }
> > +
> > +static struct rte_afu_driver afu_mf_pmd_drv = {
> > +	.id_table = afu_uuid_map,
> > +	.probe = afu_mf_rawdev_probe,
> > +	.remove = afu_mf_rawdev_remove
> > +};
> > +
> > +RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME,
> afu_mf_pmd_drv);
> > +RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
> > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h
> > b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > new file mode 100644
> > index 0000000..df6715c
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > @@ -0,0 +1,71 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2022 Intel Corporation
> > + */
> > +
> > +#ifndef __AFU_MF_RAWDEV_H__
> > +#define __AFU_MF_RAWDEV_H__
> > +
> > +#include <stdint.h>
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +
> > +#include <rte_cycles.h>
> > +#include <rte_bus_ifpga.h>
> > +#include <rte_rawdev.h>
> > +
> > +extern int afu_mf_pmd_logtype;
> > +
> > +#define AFU_MF_PMD_LOG(level, fmt, args...) \
> > +	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n", \
> > +		__func__, ##args)
> > +
> > +#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG, ">>")
> > +
> > +#define AFU_MF_PMD_DEBUG(fmt, args...) \
> > +	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
> > +#define AFU_MF_PMD_INFO(fmt, args...) \
> > +	AFU_MF_PMD_LOG(INFO, fmt, ## args)
> > +#define AFU_MF_PMD_ERR(fmt, args...) \
> > +	AFU_MF_PMD_LOG(ERR, fmt, ## args)
> > +#define AFU_MF_PMD_WARN(fmt, args...) \
> > +	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
> > +
> > +struct afu_mf_rawdev;
> > +
> > +struct afu_mf_ops {
> > +	int (*init)(struct afu_mf_rawdev *dev);
> > +	int (*config)(struct afu_mf_rawdev *dev, void *config,
> > +		size_t config_size);
> > +	int (*start)(struct afu_mf_rawdev *dev);
> > +	int (*stop)(struct afu_mf_rawdev *dev);
> > +	int (*test)(struct afu_mf_rawdev *dev);
> > +	int (*close)(struct afu_mf_rawdev *dev);
> > +	int (*reset)(struct afu_mf_rawdev *dev);
> > +	int (*dump)(struct afu_mf_rawdev *dev, FILE *f); };
> > +
> > +struct afu_mf_drv {
> > +	struct rte_afu_uuid uuid;
> > +	struct afu_mf_ops *ops;
> > +};
> > +
> > +struct afu_mf_shared {
> > +	int32_t lock;
> > +};
> > +
> > +struct afu_mf_rawdev {
> > +	struct rte_rawdev *rawdev;  /* point to parent raw device */
> > +	struct afu_mf_shared *shared;  /* shared data for multi-process */
> > +	struct afu_mf_ops *ops;  /* device operation functions */
> > +	int port;  /* index of port the AFU attached */
> > +	void *addr;  /* base address of AFU registers */
> > +	void *priv;  /* private driver data */ };
> > +
> > +static inline struct afu_mf_rawdev *
> > +afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev) {
> > +	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private : NULL;
> > +}
> > +
> > +#endif /* __AFU_MF_RAWDEV_H__ */
> > diff --git a/drivers/raw/afu_mf/meson.build
> > b/drivers/raw/afu_mf/meson.build new file mode 100644 index
> > 0000000..80526a2
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/meson.build
> > @@ -0,0 +1,5 @@
> > +# SPDX-License-Identifier: BSD-3-Clause # Copyright 2022 Intel
> > +Corporation
> > +
> > +deps += ['rawdev', 'bus_pci', 'bus_ifpga'] sources =
> > +files('afu_mf_rawdev.c')
> > diff --git a/drivers/raw/afu_mf/version.map
> > b/drivers/raw/afu_mf/version.map new file mode 100644 index
> > 0000000..c2e0723
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/version.map
> > @@ -0,0 +1,3 @@
> > +DPDK_22 {
> > +	local: *;
> > +};
> > diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build index
> > 05e7de1..c3627f7 100644
> > --- a/drivers/raw/meson.build
> > +++ b/drivers/raw/meson.build
> > @@ -6,6 +6,7 @@ if is_windows
> >  endif
> >
> >  drivers = [
> > +        'afu_mf',
> >          'cnxk_bphy',
> >          'cnxk_gpio',
> >          'dpaa2_cmdif',
> > --
> > 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 4/5] raw/afu_mf: add HE-MEM AFU driver
  2022-05-27  5:37         ` [PATCH v5 4/5] raw/afu_mf: add HE-MEM " Wei Huang
@ 2022-06-06  6:36           ` Zhang, Tianfei
  2022-06-07  2:43             ` Huang, Wei
  0 siblings, 1 reply; 57+ messages in thread
From: Zhang, Tianfei @ 2022-06-06  6:36 UTC (permalink / raw)
  To: Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Huang, Wei <wei.huang@intel.com>
> Sent: Friday, May 27, 2022 1:37 PM
> To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>
> Subject: [PATCH v5 4/5] raw/afu_mf: add HE-MEM AFU driver
> 
> HE-MEM is one of the host exerciser modules in OFS FPGA, which is used to test
> local memory with built-in traffic generator.
> This driver initialize the module and report test result.
> 
> Signed-off-by: Wei Huang <wei.huang@intel.com>
> ---
>  drivers/raw/afu_mf/afu_mf_rawdev.c |   3 +
>  drivers/raw/afu_mf/he_mem.c        | 181
> +++++++++++++++++++++++++++++++++++++
>  drivers/raw/afu_mf/he_mem.h        |  40 ++++++++
>  drivers/raw/afu_mf/meson.build     |   2 +-
>  drivers/raw/afu_mf/rte_pmd_afu.h   |   7 ++
>  5 files changed, 232 insertions(+), 1 deletion(-)  create mode 100644
> drivers/raw/afu_mf/he_mem.c  create mode 100644
> drivers/raw/afu_mf/he_mem.h
> 
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> b/drivers/raw/afu_mf/afu_mf_rawdev.c
> index e91eb21..a56f60e 100644
> --- a/drivers/raw/afu_mf/afu_mf_rawdev.c
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> @@ -21,6 +21,7 @@
>  #include "afu_mf_rawdev.h"
>  #include "n3000_afu.h"
>  #include "he_lbk.h"
> +#include "he_mem.h"
> 
>  #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> 
> @@ -28,6 +29,7 @@
>  	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
>  	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
>  	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
> +	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
>  	{ 0, 0 /* sentinel */ }
>  };
> 
> @@ -35,6 +37,7 @@
>  	&n3000_afu_drv,
>  	&he_lbk_drv,
>  	&he_mem_lbk_drv,
> +	&he_mem_tg_drv,
>  	NULL
>  };
> 
> diff --git a/drivers/raw/afu_mf/he_mem.c b/drivers/raw/afu_mf/he_mem.c
> new file mode 100644 index 0000000..ccbb3a8
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_mem.c
> @@ -0,0 +1,181 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "he_mem.h"
> +
> +static int he_mem_tg_test(struct afu_mf_rawdev *dev) {
> +	struct he_mem_tg_priv *priv = NULL;
> +	struct rte_pmd_afu_he_mem_tg_cfg *cfg = NULL;
> +	struct he_mem_tg_ctx *ctx = NULL;
> +	uint64_t value = 0x12345678;
> +	uint64_t cap = 0;
> +	uint64_t channel_mask = 0;
> +	int i, t = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_mem_tg_cfg;
> +	ctx = &priv->he_mem_tg_ctx;
> +
> +	AFU_MF_PMD_DEBUG("Channel mask: 0x%x", cfg->channel_mask);
> +
> +	rte_write64(value, ctx->addr + MEM_TG_SCRATCHPAD);
> +	cap = rte_read64(ctx->addr + MEM_TG_SCRATCHPAD);
> +	AFU_MF_PMD_DEBUG("Scratchpad value: 0x%"PRIx64, cap);
> +	if (cap != value) {
> +		AFU_MF_PMD_ERR("Test scratchpad register failed");
> +		return -EIO;
> +	}
> +
> +	cap = rte_read64(ctx->addr + MEM_TG_CTRL);
> +	AFU_MF_PMD_DEBUG("Capability: 0x%"PRIx64, cap);
> +
> +	channel_mask = cfg->channel_mask & cap;
> +	/* start traffic generators */
> +	rte_write64(channel_mask, ctx->addr + MEM_TG_CTRL);
> +
> +	/* check test status */
> +	while (t < MEM_TG_TIMEOUT_MS) {
> +		value = rte_read64(ctx->addr + MEM_TG_STAT);
> +		for (i = 0; i < NUM_MEM_TG_CHANNELS; i++) {
> +			if (channel_mask & (1 << i)) {
> +				if (TGACTIVE(value, i))
> +					continue;
> +				printf("TG channel %d test %s\n", i,
> +					TGPASS(value, i) ? "pass" :
> +					TGTIMEOUT(value, i) ? "timeout" :
> +					TGFAIL(value, i) ? "fail" : "error");
> +				channel_mask &= ~(1 << i);
> +			}
> +		}
> +		if (!channel_mask)
> +			break;
> +		rte_delay_ms(MEM_TG_POLL_INTERVAL_MS);
> +		t += MEM_TG_POLL_INTERVAL_MS;
> +	}
> +
> +	if (channel_mask) {
> +		AFU_MF_PMD_ERR("Timeout 0x%04lx", (unsigned long)value);
> +		return channel_mask;
> +	}
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_init(struct afu_mf_rawdev *dev) {
> +	struct he_mem_tg_priv *priv = NULL;
> +	struct he_mem_tg_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv) {
> +		priv = rte_zmalloc(NULL, sizeof(struct he_mem_tg_priv), 0);
> +		if (!priv)
> +			return -ENOMEM;
> +		dev->priv = priv;
> +	}
> +
> +	ctx = &priv->he_mem_tg_ctx;
> +	ctx->addr = (uint8_t *)dev->addr;
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct he_mem_tg_priv *priv = NULL;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_he_mem_tg_cfg))
> +		return -EINVAL;
> +
> +	rte_memcpy(&priv->he_mem_tg_cfg, config, sizeof(priv-
> >he_mem_tg_cfg));
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_close(struct afu_mf_rawdev *dev) {
> +	if (!dev)
> +		return -EINVAL;
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_mem_tg_dump(struct afu_mf_rawdev *dev, FILE *f) {
> +	struct he_mem_tg_priv *priv = NULL;
> +	struct he_mem_tg_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_mem_tg_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	ctx = &priv->he_mem_tg_ctx;
> +
> +	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops he_mem_tg_ops = {
> +	.init = he_mem_tg_init,
> +	.config = he_mem_tg_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = he_mem_tg_test,
> +	.close = he_mem_tg_close,
> +	.dump = he_mem_tg_dump,
> +	.reset = NULL
> +};
> +
> +struct afu_mf_drv he_mem_tg_drv = {
> +	.uuid = { HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
> +	.ops = &he_mem_tg_ops
> +};
> diff --git a/drivers/raw/afu_mf/he_mem.h b/drivers/raw/afu_mf/he_mem.h
> new file mode 100644 index 0000000..82404b6
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_mem.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _HE_MEM_H_
> +#define _HE_MEM_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define HE_MEM_TG_UUID_L  0xa3dc5b831f5cecbb #define
> HE_MEM_TG_UUID_H
> +0x4dadea342c7848cb
> +
> +#define NUM_MEM_TG_CHANNELS      4
> +#define MEM_TG_TIMEOUT_MS     5000
> +#define MEM_TG_POLL_INTERVAL_MS 10
> +
> +extern struct afu_mf_drv he_mem_tg_drv;
> +
> +/* MEM-TG registers definition */
> +#define MEM_TG_SCRATCHPAD   0x28
> +#define MEM_TG_CTRL         0x30
> +#define   TGCONTROL(n)      (1 << (n))
> +#define MEM_TG_STAT         0x38
> +#define   TGSTATUS(v, n)    (((v) >> (n << 2)) & 0xf)
> +#define   TGPASS(v, n)      (((v) >> ((n << 2) + 3)) & 0x1)
> +#define   TGFAIL(v, n)      (((v) >> ((n << 2) + 2)) & 0x1)
> +#define   TGTIMEOUT(v, n)   (((v) >> ((n << 2) + 1)) & 0x1)
> +#define   TGACTIVE(v, n)    (((v) >> (n << 2)) & 0x1)
> +
> +struct he_mem_tg_ctx {
> +	uint8_t *addr;
> +};
> +
> +struct he_mem_tg_priv {
> +	struct rte_pmd_afu_he_mem_tg_cfg he_mem_tg_cfg;
> +	struct he_mem_tg_ctx he_mem_tg_ctx;
> +};
> +
> +#endif /* _HE_MEM_H_ */
> diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
> index a983f53..b53a31b 100644
> --- a/drivers/raw/afu_mf/meson.build
> +++ b/drivers/raw/afu_mf/meson.build
> @@ -2,6 +2,6 @@
>  # Copyright 2022 Intel Corporation
> 
>  deps += ['rawdev', 'bus_pci', 'bus_ifpga'] -sources = files('afu_mf_rawdev.c',
> 'n3000_afu.c', 'he_lbk.c')
> +sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c',
> +'he_mem.c')
> 
>  headers = files('rte_pmd_afu.h')
> diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h
> b/drivers/raw/afu_mf/rte_pmd_afu.h
> index 658df55..2f92f7e 100644
> --- a/drivers/raw/afu_mf/rte_pmd_afu.h
> +++ b/drivers/raw/afu_mf/rte_pmd_afu.h
> @@ -104,6 +104,13 @@ struct rte_pmd_afu_he_lbk_cfg {
>  	uint32_t freq_mhz;
>  };
> 
> +/**
> + * HE-MEM-TG AFU configuration data structure.
> + */
> +struct rte_pmd_afu_he_mem_tg_cfg {
> +	uint32_t channel_mask;   /* mask of traffic generator channel */
> +};
> +
>  #ifdef __cplusplus
>  }
>  #endif

The file name change to "afu_pmd_he_mem.c" is better?
Other part is good for me, you can add:
Acked-by: Tianfei Zhang <tianfei.zhang@intel.com>

> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 5/5] raw/afu_mf: add HE-HSSI AFU driver
  2022-05-27  5:37         ` [PATCH v5 5/5] raw/afu_mf: add HE-HSSI " Wei Huang
@ 2022-06-06  6:39           ` Zhang, Tianfei
  2022-06-07  2:44             ` Huang, Wei
  0 siblings, 1 reply; 57+ messages in thread
From: Zhang, Tianfei @ 2022-06-06  6:39 UTC (permalink / raw)
  To: Huang, Wei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Huang, Wei <wei.huang@intel.com>
> Sent: Friday, May 27, 2022 1:37 PM
> To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>
> Subject: [PATCH v5 5/5] raw/afu_mf: add HE-HSSI AFU driver
> 
> HE-HSSI is one of the host exerciser modules in OFS FPGA, which is used to test
> HSSI (High Speed Serial Interface).
> This driver initialize the module and report test result.
> 
> Signed-off-by: Wei Huang <wei.huang@intel.com>
> ---
>  drivers/raw/afu_mf/afu_mf_rawdev.c |   3 +
>  drivers/raw/afu_mf/he_hssi.c       | 369

The filename change to "afu_pmd_he_hssi.c" is better?

> +++++++++++++++++++++++++++++++++++++
>  drivers/raw/afu_mf/he_hssi.h       | 102 ++++++++++
>  drivers/raw/afu_mf/meson.build     |   3 +-
>  drivers/raw/afu_mf/rte_pmd_afu.h   |  16 ++
>  5 files changed, 492 insertions(+), 1 deletion(-)  create mode 100644
> drivers/raw/afu_mf/he_hssi.c  create mode 100644
> drivers/raw/afu_mf/he_hssi.h
> 
> diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> b/drivers/raw/afu_mf/afu_mf_rawdev.c
> index a56f60e..f24c748 100644
> --- a/drivers/raw/afu_mf/afu_mf_rawdev.c
> +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> @@ -22,6 +22,7 @@
>  #include "n3000_afu.h"
>  #include "he_lbk.h"
>  #include "he_mem.h"
> +#include "he_hssi.h"
> 
>  #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> 
> @@ -30,6 +31,7 @@
>  	{ HE_LBK_UUID_L, HE_LBK_UUID_H },
>  	{ HE_MEM_LBK_UUID_L, HE_MEM_LBK_UUID_H },
>  	{ HE_MEM_TG_UUID_L, HE_MEM_TG_UUID_H },
> +	{ HE_HSSI_UUID_L, HE_HSSI_UUID_H },
>  	{ 0, 0 /* sentinel */ }
>  };
> 
> @@ -38,6 +40,7 @@
>  	&he_lbk_drv,
>  	&he_mem_lbk_drv,
>  	&he_mem_tg_drv,
> +	&he_hssi_drv,
>  	NULL
>  };
> 
> diff --git a/drivers/raw/afu_mf/he_hssi.c b/drivers/raw/afu_mf/he_hssi.c new
> file mode 100644 index 0000000..bedafbd
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_hssi.c
> @@ -0,0 +1,369 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <sys/eventfd.h>
> +#include <sys/ioctl.h>
> +
> +#include <rte_eal.h>
> +#include <rte_malloc.h>
> +#include <rte_memcpy.h>
> +#include <rte_io.h>
> +#include <rte_vfio.h>
> +#include <rte_bus_pci.h>
> +#include <rte_bus_ifpga.h>
> +#include <rte_rawdev.h>
> +
> +#include "afu_mf_rawdev.h"
> +#include "he_hssi.h"
> +
> +static int he_hssi_indirect_write(struct he_hssi_ctx *ctx, uint32_t addr,
> +	uint32_t value)
> +{
> +	struct traffic_ctrl_cmd cmd;
> +	struct traffic_ctrl_data data;
> +	uint32_t i = 0;
> +
> +	AFU_MF_PMD_DEBUG("Indirect write 0x%x, value 0x%08x", addr,
> value);
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	data.write_data = value;
> +	rte_write64(data.csr, ctx->addr + TRAFFIC_CTRL_DATA);
> +
> +	cmd.csr = 0;
> +	cmd.write_cmd = 1;
> +	cmd.afu_cmd_addr = addr;
> +	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (cmd.ack_trans)
> +			break;
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	i = 0;
> +	cmd.csr = 0;
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		cmd.ack_trans = 1;
> +		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (!cmd.ack_trans)
> +			break;
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	return 0;
> +}
> +
> +static int he_hssi_indirect_read(struct he_hssi_ctx *ctx, uint32_t addr,
> +	uint32_t *value)
> +{
> +	struct traffic_ctrl_cmd cmd;
> +	struct traffic_ctrl_data data;
> +	uint32_t i = 0;
> +
> +	if (!ctx)
> +		return -EINVAL;
> +
> +	cmd.csr = 0;
> +	cmd.read_cmd = 1;
> +	cmd.afu_cmd_addr = addr;
> +	rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (cmd.ack_trans) {
> +			data.csr = rte_read64(ctx->addr +
> TRAFFIC_CTRL_DATA);
> +			*value = data.read_data;
> +			break;
> +		}
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	i = 0;
> +	cmd.csr = 0;
> +	while (i < MAILBOX_TIMEOUT_MS) {
> +		cmd.ack_trans = 1;
> +		rte_write64(cmd.csr, ctx->addr + TRAFFIC_CTRL_CMD);
> +		rte_delay_ms(MAILBOX_POLL_INTERVAL_MS);
> +		cmd.csr = rte_read64(ctx->addr + TRAFFIC_CTRL_CMD);
> +		if (!cmd.ack_trans)
> +			break;
> +		i += MAILBOX_POLL_INTERVAL_MS;
> +	}
> +	if (i >= MAILBOX_TIMEOUT_MS)
> +		return -ETIMEDOUT;
> +
> +	AFU_MF_PMD_DEBUG("Indirect read 0x%x, value 0x%08x", addr,
> *value);
> +	return 0;
> +}
> +
> +static void he_hssi_report(struct he_hssi_ctx *ctx) {
> +	uint32_t val = 0;
> +	uint64_t v64 = 0;
> +	int ret = 0;
> +
> +	ret = he_hssi_indirect_read(ctx, TM_PKT_GOOD, &val);
> +	if (ret)
> +		return;
> +	printf("Number of good packets received: %u\n", val);
> +
> +	ret = he_hssi_indirect_read(ctx, TM_PKT_BAD, &val);
> +	if (ret)
> +		return;
> +	printf("Number of bad packets received: %u\n", val);
> +
> +	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT1, &val);
> +	if (ret)
> +		return;
> +	v64 = val;
> +	ret = he_hssi_indirect_read(ctx, TM_BYTE_CNT0, &val);
> +	if (ret)
> +		return;
> +	v64 = (v64 << 32) | val;
> +	printf("Number of bytes received: %"PRIu64"\n", v64);
> +
> +	ret = he_hssi_indirect_read(ctx, TM_AVST_RX_ERR, &val);
> +	if (ret)
> +		return;
> +	if (val & ERR_VALID) {
> +		printf("AVST rx error:");
> +		if (val & OVERFLOW_ERR)
> +			printf(" overflow");
> +		if (val & LENGTH_ERR)
> +			printf(" length");
> +		if (val & OVERSIZE_ERR)
> +			printf(" oversize");
> +		if (val & UNDERSIZE_ERR)
> +			printf(" undersize");
> +		if (val & MAC_CRC_ERR)
> +			printf(" crc");
> +		if (val & PHY_ERR)
> +			printf(" phy");
> +		printf("\n");
> +	}
> +
> +	ret = he_hssi_indirect_read(ctx, LOOPBACK_FIFO_STATUS, &val);
> +	if (ret)
> +		return;
> +	if (val & (ALMOST_EMPTY | ALMOST_FULL)) {
> +		printf("FIFO status:");
> +		if (val & ALMOST_EMPTY)
> +			printf(" almost empty");
> +		if (val & ALMOST_FULL)
> +			printf(" almost full");
> +		printf("\n");
> +	}
> +}
> +
> +static int he_hssi_test(struct afu_mf_rawdev *dev) {
> +	struct he_hssi_priv *priv = NULL;
> +	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
> +	struct he_hssi_ctx *ctx = NULL;
> +	struct traffic_ctrl_ch_sel sel;
> +	uint32_t val = 0;
> +	uint32_t i = 0;
> +	int ret = 0;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	cfg = &priv->he_hssi_cfg;
> +	ctx = &priv->he_hssi_ctx;
> +
> +	ret = he_hssi_indirect_write(ctx, TG_STOP_XFR, 0);
> +	if (ret)
> +		return ret;
> +
> +	sel.channel_sel = cfg->port;
> +	rte_write64(sel.csr, ctx->addr + TRAFFIC_CTRL_CH_SEL);
> +
> +	if (cfg->he_loopback >= 0) {
> +		val = cfg->he_loopback ? 1 : 0;
> +		AFU_MF_PMD_INFO("%s HE loopback on port %u",
> +			val ? "Enable" : "Disable", cfg->port);
> +		return he_hssi_indirect_write(ctx, LOOPBACK_EN, val);
> +	}
> +
> +	ret = he_hssi_indirect_write(ctx, TG_NUM_PKT, cfg->num_packets);
> +	if (ret)
> +		return ret;
> +
> +	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN, cfg->packet_length);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->src_addr & 0xffffffff;
> +	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_L, val);
> +	if (ret)
> +		return ret;
> +	val = (cfg->src_addr >> 32) & 0xffff;
> +	ret = he_hssi_indirect_write(ctx, TG_SRC_MAC_H, val);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->dest_addr & 0xffffffff;
> +	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_L, val);
> +	if (ret)
> +		return ret;
> +	val = (cfg->dest_addr >> 32) & 0xffff;
> +	ret = he_hssi_indirect_write(ctx, TG_DST_MAC_H, val);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->random_length ? 1 : 0;
> +	ret = he_hssi_indirect_write(ctx, TG_PKT_LEN_TYPE, val);
> +	if (ret)
> +		return ret;
> +
> +	val = cfg->random_payload ? 1 : 0;
> +	ret = he_hssi_indirect_write(ctx, TG_DATA_PATTERN, val);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < 3; i++) {

What meaning of "3" ?

> +		ret = he_hssi_indirect_write(ctx, TG_RANDOM_SEED(i),
> +			cfg->rnd_seed[i]);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	ret = he_hssi_indirect_write(ctx, TG_START_XFR, 1);
> +	if (ret)
> +		return ret;
> +
> +	while (i++ < cfg->timeout) {
> +		ret = he_hssi_indirect_read(ctx, TG_PKT_XFRD, &val);
> +		if (ret)
> +			break;
> +		if (val == cfg->num_packets)
> +			break;
> +		sleep(1);
> +	}
> +
> +	he_hssi_report(ctx);
> +
> +	return ret;
> +}
> +
> +static int he_hssi_init(struct afu_mf_rawdev *dev) {
> +	struct he_hssi_priv *priv = NULL;
> +	struct he_hssi_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv) {
> +		priv = rte_zmalloc(NULL, sizeof(struct he_hssi_priv), 0);
> +		if (!priv)
> +			return -ENOMEM;
> +		dev->priv = priv;
> +	}
> +
> +	ctx = &priv->he_hssi_ctx;
> +	ctx->addr = (uint8_t *)dev->addr;
> +
> +	return 0;
> +}
> +
> +static int he_hssi_config(struct afu_mf_rawdev *dev, void *config,
> +	size_t config_size)
> +{
> +	struct he_hssi_priv *priv = NULL;
> +	struct rte_pmd_afu_he_hssi_cfg *cfg = NULL;
> +
> +	if (!dev || !config || !config_size)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (config_size != sizeof(struct rte_pmd_afu_he_hssi_cfg))
> +		return -EINVAL;
> +
> +	cfg = (struct rte_pmd_afu_he_hssi_cfg *)config;
> +	if (cfg->port >= NUM_HE_HSSI_PORTS)
> +		return -EINVAL;
> +
> +	rte_memcpy(&priv->he_hssi_cfg, cfg, sizeof(priv->he_hssi_cfg));
> +
> +	return 0;
> +}
> +
> +static int he_hssi_close(struct afu_mf_rawdev *dev) {
> +	if (!dev)
> +		return -EINVAL;
> +
> +	rte_free(dev->priv);
> +	dev->priv = NULL;
> +
> +	return 0;
> +}
> +
> +static int he_hssi_dump(struct afu_mf_rawdev *dev, FILE *f) {
> +	struct he_hssi_priv *priv = NULL;
> +	struct he_hssi_ctx *ctx = NULL;
> +
> +	if (!dev)
> +		return -EINVAL;
> +
> +	priv = (struct he_hssi_priv *)dev->priv;
> +	if (!priv)
> +		return -ENOENT;
> +
> +	if (!f)
> +		f = stdout;
> +
> +	ctx = &priv->he_hssi_ctx;
> +
> +	fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
> +
> +	return 0;
> +}
> +
> +static struct afu_mf_ops he_hssi_ops = {
> +	.init = he_hssi_init,
> +	.config = he_hssi_config,
> +	.start = NULL,
> +	.stop = NULL,
> +	.test = he_hssi_test,
> +	.close = he_hssi_close,
> +	.dump = he_hssi_dump,
> +	.reset = NULL
> +};
> +
> +struct afu_mf_drv he_hssi_drv = {
> +	.uuid = { HE_HSSI_UUID_L, HE_HSSI_UUID_H },
> +	.ops = &he_hssi_ops
> +};
> diff --git a/drivers/raw/afu_mf/he_hssi.h b/drivers/raw/afu_mf/he_hssi.h new
> file mode 100644 index 0000000..f8b9623
> --- /dev/null
> +++ b/drivers/raw/afu_mf/he_hssi.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#ifndef _HE_HSSI_H_
> +#define _HE_HSSI_H_
> +
> +#include "afu_mf_rawdev.h"
> +#include "rte_pmd_afu.h"
> +
> +#define HE_HSSI_UUID_L    0xbb370242ac130002
> +#define HE_HSSI_UUID_H    0x823c334c98bf11ea
> +#define NUM_HE_HSSI_PORTS 8
> +
> +extern struct afu_mf_drv he_hssi_drv;
> +
> +/* HE-HSSI registers definition */
> +#define TRAFFIC_CTRL_CMD    0x30
> +#define TRAFFIC_CTRL_DATA   0x38
> +#define TRAFFIC_CTRL_CH_SEL 0x40
> +#define AFU_SCRATCHPAD      0x48
> +
> +#define TG_NUM_PKT        0x3c00
> +#define TG_PKT_LEN_TYPE   0x3c01
> +#define TG_DATA_PATTERN   0x3c02
> +#define TG_START_XFR      0x3c03
> +#define TG_STOP_XFR       0x3c04
> +#define TG_SRC_MAC_L      0x3c05
> +#define TG_SRC_MAC_H      0x3c06
> +#define TG_DST_MAC_L      0x3c07
> +#define TG_DST_MAC_H      0x3c08
> +#define TG_PKT_XFRD       0x3c09
> +#define TG_RANDOM_SEED(n) (0x3c0a + (n))
> +#define TG_PKT_LEN        0x3c0d
> +
> +#define TM_NUM_PKT        0x3d00
> +#define TM_PKT_GOOD       0x3d01
> +#define TM_PKT_BAD        0x3d02
> +#define TM_BYTE_CNT0      0x3d03
> +#define TM_BYTE_CNT1      0x3d04
> +#define TM_AVST_RX_ERR    0x3d07
> +#define   OVERFLOW_ERR    (1 << 9)
> +#define   LENGTH_ERR      (1 << 8)
> +#define   OVERSIZE_ERR    (1 << 7)
> +#define   UNDERSIZE_ERR   (1 << 6)
> +#define   MAC_CRC_ERR     (1 << 5)
> +#define   PHY_ERR         (1 << 4)
> +#define   ERR_VALID       (1 << 3)
> +
> +#define LOOPBACK_EN          0x3e00
> +#define LOOPBACK_FIFO_STATUS 0x3e01
> +#define   ALMOST_EMPTY    (1 << 1)
> +#define   ALMOST_FULL     (1 << 0)
> +
> +#define MAILBOX_TIMEOUT_MS       100
> +#define MAILBOX_POLL_INTERVAL_MS 10
> +
> +struct traffic_ctrl_cmd {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t read_cmd:1;
> +			uint32_t write_cmd:1;
> +			uint32_t ack_trans:1;
> +			uint32_t rsvd1:29;
> +			uint32_t afu_cmd_addr:16;
> +			uint32_t rsvd2:16;
> +		};
> +	};
> +};
> +
> +struct traffic_ctrl_data {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t read_data;
> +			uint32_t write_data;
> +		};
> +	};
> +};
> +
> +struct traffic_ctrl_ch_sel {
> +	union {
> +		uint64_t csr;
> +		struct {
> +			uint32_t channel_sel:3;
> +			uint32_t rsvd1:29;
> +			uint32_t rsvd2;
> +		};
> +	};
> +};
> +
> +struct he_hssi_ctx {
> +	uint8_t *addr;
> +};
> +
> +struct he_hssi_priv {
> +	struct rte_pmd_afu_he_hssi_cfg he_hssi_cfg;
> +	struct he_hssi_ctx he_hssi_ctx;
> +};
> +
> +#endif /* _HE_HSSI_H_ */
> diff --git a/drivers/raw/afu_mf/meson.build b/drivers/raw/afu_mf/meson.build
> index b53a31b..f304bc8 100644
> --- a/drivers/raw/afu_mf/meson.build
> +++ b/drivers/raw/afu_mf/meson.build
> @@ -2,6 +2,7 @@
>  # Copyright 2022 Intel Corporation
> 
>  deps += ['rawdev', 'bus_pci', 'bus_ifpga'] -sources = files('afu_mf_rawdev.c',
> 'n3000_afu.c', 'he_lbk.c', 'he_mem.c')
> +sources = files('afu_mf_rawdev.c', 'n3000_afu.c', 'he_lbk.c', 'he_mem.c',
> +	'he_hssi.c')
> 
>  headers = files('rte_pmd_afu.h')
> diff --git a/drivers/raw/afu_mf/rte_pmd_afu.h
> b/drivers/raw/afu_mf/rte_pmd_afu.h
> index 2f92f7e..89d866a 100644
> --- a/drivers/raw/afu_mf/rte_pmd_afu.h
> +++ b/drivers/raw/afu_mf/rte_pmd_afu.h
> @@ -111,6 +111,22 @@ struct rte_pmd_afu_he_mem_tg_cfg {
>  	uint32_t channel_mask;   /* mask of traffic generator channel */
>  };
> 
> +/**
> + * HE-HSSI AFU configuration data structure.
> + */
> +struct rte_pmd_afu_he_hssi_cfg {
> +	uint32_t port;
> +	uint32_t timeout;
> +	uint32_t num_packets;
> +	uint32_t random_length;
> +	uint32_t packet_length;
> +	uint32_t random_payload;
> +	uint32_t rnd_seed[3];
> +	uint64_t src_addr;
> +	uint64_t dest_addr;
> +	int he_loopback;
> +};
> +
>  #ifdef __cplusplus
>  }
>  #endif
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
  2022-05-27  5:37         ` [PATCH v5 1/5] drivers/raw: introduce AFU " Wei Huang
  2022-06-06  1:52           ` Zhang, Tianfei
@ 2022-06-06 15:38           ` Stephen Hemminger
  2022-06-07  1:36             ` Huang, Wei
  1 sibling, 1 reply; 57+ messages in thread
From: Stephen Hemminger @ 2022-06-06 15:38 UTC (permalink / raw)
  To: Wei Huang
  Cc: dev, thomas, nipun.gupta, hemant.agrawal, stable, rosen.xu,
	tianfei.zhang, qi.z.zhang

On Fri, 27 May 2022 01:37:00 -0400
Wei Huang <wei.huang@intel.com> wrote:

> +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev)
> +{
> +	int32_t x = 0;
> +
> +	if (!dev || !dev->shared)
> +		return -ENODEV;
> +
> +	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
> +
> +	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock, &x, 1,
> +				1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) == 0))
> +		return -EBUSY;
> +
> +	return 0;
> +}
> +
> +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev)
> +{
> +	if (!dev || !dev->shared)
> +		return;
> +
> +	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE);
> +}

What other subsystem is this shared with?
Is there a good reason it can't just use existing spinlock?

^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
  2022-06-06 15:38           ` Stephen Hemminger
@ 2022-06-07  1:36             ` Huang, Wei
  0 siblings, 0 replies; 57+ messages in thread
From: Huang, Wei @ 2022-06-07  1:36 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, thomas, nipun.gupta, hemant.agrawal, stable, Xu, Rosen,
	Zhang, Tianfei, Zhang, Qi Z



> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Monday, June 6, 2022 23:38
> To: Huang, Wei <wei.huang@intel.com>
> Cc: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com; stable@dpdk.org; Xu, Rosen
> <rosen.xu@intel.com>; Zhang, Tianfei <tianfei.zhang@intel.com>; Zhang, Qi
> Z <qi.z.zhang@intel.com>
> Subject: Re: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> 
> On Fri, 27 May 2022 01:37:00 -0400
> Wei Huang <wei.huang@intel.com> wrote:
> 
> > +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev) {
> > +	int32_t x = 0;
> > +
> > +	if (!dev || !dev->shared)
> > +		return -ENODEV;
> > +
> > +	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
> > +
> > +	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock,
> &x, 1,
> > +				1, __ATOMIC_ACQUIRE,
> __ATOMIC_RELAXED) == 0))
> > +		return -EBUSY;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev) {
> > +	if (!dev || !dev->shared)
> > +		return;
> > +
> > +	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE); }
> 
> What other subsystem is this shared with?
> Is there a good reason it can't just use existing spinlock?

This lock is used in multi-process situation, it prevent AFU from being operated by two DPDK process simultaneously.
I can use spinlock to replace it, thanks.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 0/5] introduce afu_mf raw device driver
  2022-06-06  1:47         ` [PATCH v5 0/5] introduce afu_mf raw device driver Zhang, Tianfei
@ 2022-06-07  2:34           ` Huang, Wei
  0 siblings, 0 replies; 57+ messages in thread
From: Huang, Wei @ 2022-06-07  2:34 UTC (permalink / raw)
  To: Zhang, Tianfei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Zhang, Tianfei <tianfei.zhang@intel.com>
> Sent: Monday, June 6, 2022 09:48
> To: Huang, Wei <wei.huang@intel.com>; dev@dpdk.org;
> thomas@monjalon.net; nipun.gupta@nxp.com; hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: RE: [PATCH v5 0/5] introduce afu_mf raw device driver
> 
> 
> 
> > -----Original Message-----
> > From: Huang, Wei <wei.huang@intel.com>
> > Sent: Friday, May 27, 2022 1:37 PM
> > To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> > hemant.agrawal@nxp.com
> > Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> > <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang,
> > Wei <wei.huang@intel.com>
> > Subject: [PATCH v5 0/5] introduce afu_mf raw device driver
> 
> The title can change to: introduce AFU PMD driver of FPGA
agree
> >
> > The first patch implements the framework of the AFU raw device driver.
> 
> The first patch implements the framework of the AFU PMD driver based on
> raw device interfaces.
> 
> > The subsequent patches implement the driver of some AFUs.
> 
> Can we add the guide about how to use or test those AFU PMD drivers in
> documentation, like doc/guides/rawdevs/ifpga.rst.
Add guide to file header.
> 
> >
> > Wei Huang (5):
> >   drivers/raw: introduce AFU raw device driver
> >   raw/afu_mf: add N3000 AFU driver
> >   raw/afu_mf: add HE-LBK AFU driver
> >   raw/afu_mf: add HE-MEM AFU driver
> >   raw/afu_mf: add HE-HSSI AFU driver
> >
> >  drivers/raw/afu_mf/afu_mf_rawdev.c |  440 ++++++++
> >  drivers/raw/afu_mf/afu_mf_rawdev.h |   89 ++
> >  drivers/raw/afu_mf/he_hssi.c       |  369 +++++++
> >  drivers/raw/afu_mf/he_hssi.h       |  102 ++
> >  drivers/raw/afu_mf/he_lbk.c        |  427 ++++++++
> >  drivers/raw/afu_mf/he_lbk.h        |  121 +++
> >  drivers/raw/afu_mf/he_mem.c        |  181 ++++
> >  drivers/raw/afu_mf/he_mem.h        |   40 +
> >  drivers/raw/afu_mf/meson.build     |    8 +
> >  drivers/raw/afu_mf/n3000_afu.c     | 2005
> > ++++++++++++++++++++++++++++++++++++
> >  drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
> >  drivers/raw/afu_mf/rte_pmd_afu.h   |  134 +++
> >  drivers/raw/afu_mf/version.map     |    3 +
> >  drivers/raw/meson.build            |    1 +
> >  14 files changed, 4253 insertions(+)
> >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
> >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
> >  create mode 100644 drivers/raw/afu_mf/he_hssi.c  create mode 100644
> > drivers/raw/afu_mf/he_hssi.h  create mode 100644
> > drivers/raw/afu_mf/he_lbk.c  create mode 100644
> > drivers/raw/afu_mf/he_lbk.h create mode 100644
> > drivers/raw/afu_mf/he_mem.c  create mode 100644
> > drivers/raw/afu_mf/he_mem.h  create mode 100644
> > drivers/raw/afu_mf/meson.build  create mode 100644
> > drivers/raw/afu_mf/n3000_afu.c  create mode 100644
> > drivers/raw/afu_mf/n3000_afu.h  create mode 100644
> > drivers/raw/afu_mf/rte_pmd_afu.h  create mode 100644
> > drivers/raw/afu_mf/version.map
> >
> > --
> > 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
  2022-06-06  1:52           ` Zhang, Tianfei
  2022-06-06  2:00             ` Zhang, Tianfei
@ 2022-06-07  2:35             ` Huang, Wei
  1 sibling, 0 replies; 57+ messages in thread
From: Huang, Wei @ 2022-06-07  2:35 UTC (permalink / raw)
  To: Zhang, Tianfei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Zhang, Tianfei <tianfei.zhang@intel.com>
> Sent: Monday, June 6, 2022 09:53
> To: Huang, Wei <wei.huang@intel.com>; dev@dpdk.org;
> thomas@monjalon.net; nipun.gupta@nxp.com; hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> 
> 
> 
> > -----Original Message-----
> > From: Huang, Wei <wei.huang@intel.com>
> > Sent: Friday, May 27, 2022 1:37 PM
> > To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> > hemant.agrawal@nxp.com
> > Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> > <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang,
> > Wei <wei.huang@intel.com>
> > Subject: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> >
> > Add multi-function AFU raw device driver to manage various AFU
> > (Acceleration Function Unit) in FPGA.
> > This driver implements common AFU raw device interfaces and exposes
> > them to application as standard raw device APIs.
> > Normal application can operate specified AFU as below, 1. call
> > rte_rawdev_pmd_get_named_dev() to find AFU raw device.
> > 2. call rte_rawdev_configure() to initialize AFU raw device.
> > 3. call rte_rawdev_selftest() to test function of AFU.
> >
> > Signed-off-by: Wei Huang <wei.huang@intel.com>
> > ---
> > v2: fix typo
> > ---
> > v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
> > ---
> > v4: fix coding style issue and build error in FreeBSD13-64
> > ---
> > v5: split patch into several patches
> > ---
> >  drivers/raw/afu_mf/afu_mf_rawdev.c | 425
> > +++++++++++++++++++++++++++++++++++++
> >  drivers/raw/afu_mf/afu_mf_rawdev.h |  71 +++++++
> >  drivers/raw/afu_mf/meson.build     |   5 +
> >  drivers/raw/afu_mf/version.map     |   3 +
> >  drivers/raw/meson.build            |   1 +
> >  5 files changed, 505 insertions(+)
> >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
> >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
> >  create mode 100644 drivers/raw/afu_mf/meson.build  create mode
> 100644
> > drivers/raw/afu_mf/version.map
> 
> I am thinking that just put those afu* c/h files into drivers/raw/ifpga/ folder
> is better? Because this AFU PMD driver is work on ifpga.
> And the file name of "afu_mf_rawdev.c" change to "afu_pmd_driver.c"?
> 
agree
> >
> > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> > b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > new file mode 100644
> > index 0000000..5be372a
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > @@ -0,0 +1,425 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2022 Intel Corporation
> > + */
> > +
> > +#include <errno.h>
> > +#include <stdio.h>
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +#include <fcntl.h>
> > +#include <poll.h>
> > +#include <sys/eventfd.h>
> > +
> > +#include <rte_eal.h>
> > +#include <rte_malloc.h>
> > +#include <rte_memzone.h>
> > +#include <rte_rawdev_pmd.h>
> > +
> > +#include "afu_mf_rawdev.h"
> > +
> > +#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> > +
> > +static const struct rte_afu_uuid afu_uuid_map[] = {
> > +	{ 0, 0 /* sentinel */ }
> > +};
> > +
> > +static struct afu_mf_drv *afu_table[] = {
> > +	NULL
> > +};
> > +
> > +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev) {
> > +	int32_t x = 0;
> > +
> > +	if (!dev || !dev->shared)
> > +		return -ENODEV;
> > +
> > +	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
> > +
> > +	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock,
> &x,
> > 1,
> > +				1, __ATOMIC_ACQUIRE,
> __ATOMIC_RELAXED)
> > == 0))
> > +		return -EBUSY;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev) {
> > +	if (!dev || !dev->shared)
> > +		return;
> > +
> > +	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE); }
> > +
> > +static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
> > +	rte_rawdev_obj_t config, size_t config_size) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	if (dev->ops && dev->ops->config)
> > +		ret = (*dev->ops->config)(dev, config, config_size);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_start(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please start it later");
> > +		return ret;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->start)
> > +		ret = (*dev->ops->start)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
> > +		return;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->stop)
> > +		ret = (*dev->ops->stop)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +}
> > +
> > +static int afu_mf_rawdev_close(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	if (dev->ops && dev->ops->close)
> > +		ret = (*dev->ops->close)(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
> > +		return ret;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->reset)
> > +		ret = (*dev->ops->reset)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_selftest(uint16_t dev_id) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
> > +		return -ENODEV;
> > +
> > +	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
> > +	if (!dev)
> > +		return -ENOENT;
> > +
> > +	ret = afu_mf_trylock(dev);
> > +	if (ret) {
> > +		AFU_MF_PMD_WARN("AFU is busy, please test it later");
> > +		return ret;
> > +	}
> > +
> > +	if (dev->ops && dev->ops->test)
> > +		ret = (*dev->ops->test)(dev);
> > +
> > +	afu_mf_unlock(dev);
> > +
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f) {
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		return -ENODEV;
> > +
> > +	if (dev->ops && dev->ops->dump)
> > +		ret = (*dev->ops->dump)(dev, f);
> > +
> > +	return ret;
> > +}
> > +
> > +static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
> > +	.dev_info_get = NULL,
> > +	.dev_configure = afu_mf_rawdev_configure,
> > +	.dev_start = afu_mf_rawdev_start,
> > +	.dev_stop = afu_mf_rawdev_stop,
> > +	.dev_close = afu_mf_rawdev_close,
> > +	.dev_reset = afu_mf_rawdev_reset,
> > +
> > +	.queue_def_conf = NULL,
> > +	.queue_setup = NULL,
> > +	.queue_release = NULL,
> > +	.queue_count = NULL,
> > +
> > +	.attr_get = NULL,
> > +	.attr_set = NULL,
> > +
> > +	.enqueue_bufs = NULL,
> > +	.dequeue_bufs = NULL,
> > +
> > +	.dump = afu_mf_rawdev_dump,
> > +
> > +	.xstats_get = NULL,
> > +	.xstats_get_names = NULL,
> > +	.xstats_get_by_name = NULL,
> > +	.xstats_reset = NULL,
> > +
> > +	.firmware_status_get = NULL,
> > +	.firmware_version_get = NULL,
> > +	.firmware_load = NULL,
> > +	.firmware_unload = NULL,
> > +
> > +	.dev_selftest = afu_mf_rawdev_selftest, };
> > +
> > +static int
> > +afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
> > +	int socket_id)
> > +{
> > +	const struct rte_memzone *mz;
> > +	char mz_name[RTE_MEMZONE_NAMESIZE];
> > +	struct afu_mf_shared *ptr = NULL;
> > +	int init_mz = 0;
> > +
> > +	if (!name || !data)
> > +		return -EINVAL;
> > +
> > +	/* name format is afu_?|??:??.? which is unique */
> > +	snprintf(mz_name, sizeof(mz_name), "%s", name);
> > +
> > +	mz = rte_memzone_lookup(mz_name);
> > +	if (!mz) {
> > +		mz = rte_memzone_reserve(mz_name,
> > +				sizeof(struct afu_mf_shared),
> > +				socket_id, 0);
> > +		init_mz = 1;
> > +	}
> > +
> > +	if (!mz) {
> > +		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
> > +			mz_name);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	ptr = (struct afu_mf_shared *)mz->addr;
> > +
> > +	if (init_mz)  /* initialize memory zone on the first time */
> > +		ptr->lock = 0;
> > +
> > +	*data = ptr;
> > +
> > +	return 0;
> > +}
> > +
> > +static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev,
> > +char
> > *name,
> > +	size_t size)
> > +{
> > +	int n = 0;
> > +
> > +	if (!afu_dev || !name || !size)
> > +		return -EINVAL;
> > +
> > +	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
> > +	if (n >= (int)size) {
> > +		AFU_MF_PMD_ERR("Name of AFU device is too long!");
> > +		return -ENAMETOOLONG;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id) {
> > +	struct afu_mf_drv *entry = NULL;
> > +	int i = 0;
> > +
> > +	if (!afu_id)
> > +		return NULL;
> > +
> > +	while ((entry = afu_table[i++])) {
> > +		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
> > +			(entry->uuid.uuid_high == afu_id->uuid_high))
> > +			break;
> > +	}
> > +
> > +	return entry ? entry->ops : NULL;
> > +}
> > +
> > +static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int
> > +socket_id) {
> > +	struct rte_rawdev *rawdev = NULL;
> > +	struct afu_mf_rawdev *dev = NULL;
> > +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> > +	int ret = 0;
> > +
> > +	if (!afu_dev)
> > +		return -EINVAL;
> > +
> > +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> > +	if (ret)
> > +		return ret;
> > +
> > +	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
> > +		name, socket_id);
> > +
> > +	/* Allocate device structure */
> > +	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct
> > afu_mf_rawdev),
> > +				socket_id);
> > +	if (!rawdev) {
> > +		AFU_MF_PMD_ERR("Unable to allocate raw device");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	rawdev->dev_ops = &afu_mf_rawdev_ops;
> > +	rawdev->device = &afu_dev->device;
> > +	rawdev->driver_name = afu_dev->driver->driver.name;
> > +
> > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > +	if (!dev)
> > +		goto cleanup;
> > +
> > +	dev->rawdev = rawdev;
> > +	dev->port = afu_dev->id.port;
> > +	dev->addr = afu_dev->mem_resource[0].addr;
> > +	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
> > +	if (dev->ops == NULL) {
> > +		AFU_MF_PMD_ERR("Unsupported AFU device");
> > +		goto cleanup;
> > +	}
> > +
> > +	if (dev->ops->init) {
> > +		ret = (*dev->ops->init)(dev);
> > +		if (ret) {
> > +			AFU_MF_PMD_ERR("Failed to init %s", name);
> > +			goto cleanup;
> > +		}
> > +	}
> > +
> > +	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
> > +	if (ret)
> > +		goto cleanup;
> > +
> > +	return ret;
> > +
> > +cleanup:
> > +	rte_rawdev_pmd_release(rawdev);
> > +	return ret;
> > +}
> > +
> > +static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev) {
> > +	struct rte_rawdev *rawdev = NULL;
> > +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> > +	int ret = 0;
> > +
> > +	if (!afu_dev)
> > +		return -EINVAL;
> > +
> > +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> > +	if (ret)
> > +		return ret;
> > +
> > +	AFU_MF_PMD_INFO("Destroy raw device %s", name);
> > +
> > +	rawdev = rte_rawdev_pmd_get_named_dev(name);
> > +	if (!rawdev) {
> > +		AFU_MF_PMD_ERR("Raw device %s not found", name);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* rte_rawdev_close is called by pmd_release */
> > +	ret = rte_rawdev_pmd_release(rawdev);
> > +	if (ret)
> > +		AFU_MF_PMD_DEBUG("Device cleanup failed");
> > +
> > +	return 0;
> > +}
> > +
> > +static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev) {
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +	return afu_mf_rawdev_create(afu_dev, rte_socket_id()); }
> > +
> > +static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev) {
> > +	AFU_MF_PMD_FUNC_TRACE();
> > +	return afu_mf_rawdev_destroy(afu_dev); }
> > +
> > +static struct rte_afu_driver afu_mf_pmd_drv = {
> > +	.id_table = afu_uuid_map,
> > +	.probe = afu_mf_rawdev_probe,
> > +	.remove = afu_mf_rawdev_remove
> > +};
> > +
> > +RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME,
> afu_mf_pmd_drv);
> > +RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
> > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h
> > b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > new file mode 100644
> > index 0000000..df6715c
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > @@ -0,0 +1,71 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2022 Intel Corporation
> > + */
> > +
> > +#ifndef __AFU_MF_RAWDEV_H__
> > +#define __AFU_MF_RAWDEV_H__
> > +
> > +#include <stdint.h>
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +
> > +#include <rte_cycles.h>
> > +#include <rte_bus_ifpga.h>
> > +#include <rte_rawdev.h>
> > +
> > +extern int afu_mf_pmd_logtype;
> > +
> > +#define AFU_MF_PMD_LOG(level, fmt, args...) \
> > +	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n",
> \
> > +		__func__, ##args)
> > +
> > +#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG,
> ">>")
> > +
> > +#define AFU_MF_PMD_DEBUG(fmt, args...) \
> > +	AFU_MF_PMD_LOG(DEBUG, fmt, ## args)
> > +#define AFU_MF_PMD_INFO(fmt, args...) \
> > +	AFU_MF_PMD_LOG(INFO, fmt, ## args)
> > +#define AFU_MF_PMD_ERR(fmt, args...) \
> > +	AFU_MF_PMD_LOG(ERR, fmt, ## args)
> > +#define AFU_MF_PMD_WARN(fmt, args...) \
> > +	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
> > +
> > +struct afu_mf_rawdev;
> > +
> > +struct afu_mf_ops {
> > +	int (*init)(struct afu_mf_rawdev *dev);
> > +	int (*config)(struct afu_mf_rawdev *dev, void *config,
> > +		size_t config_size);
> > +	int (*start)(struct afu_mf_rawdev *dev);
> > +	int (*stop)(struct afu_mf_rawdev *dev);
> > +	int (*test)(struct afu_mf_rawdev *dev);
> > +	int (*close)(struct afu_mf_rawdev *dev);
> > +	int (*reset)(struct afu_mf_rawdev *dev);
> > +	int (*dump)(struct afu_mf_rawdev *dev, FILE *f); };
> > +
> > +struct afu_mf_drv {
> > +	struct rte_afu_uuid uuid;
> > +	struct afu_mf_ops *ops;
> > +};
> > +
> > +struct afu_mf_shared {
> > +	int32_t lock;
> > +};
> > +
> > +struct afu_mf_rawdev {
> > +	struct rte_rawdev *rawdev;  /* point to parent raw device */
> > +	struct afu_mf_shared *shared;  /* shared data for multi-process */
> > +	struct afu_mf_ops *ops;  /* device operation functions */
> > +	int port;  /* index of port the AFU attached */
> > +	void *addr;  /* base address of AFU registers */
> > +	void *priv;  /* private driver data */ };
> > +
> > +static inline struct afu_mf_rawdev *
> > +afu_mf_rawdev_get_priv(const struct rte_rawdev *rawdev) {
> > +	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private :
> NULL;
> > +}
> > +
> > +#endif /* __AFU_MF_RAWDEV_H__ */
> > diff --git a/drivers/raw/afu_mf/meson.build
> > b/drivers/raw/afu_mf/meson.build new file mode 100644 index
> > 0000000..80526a2
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/meson.build
> > @@ -0,0 +1,5 @@
> > +# SPDX-License-Identifier: BSD-3-Clause # Copyright 2022 Intel
> > +Corporation
> > +
> > +deps += ['rawdev', 'bus_pci', 'bus_ifpga'] sources =
> > +files('afu_mf_rawdev.c')
> > diff --git a/drivers/raw/afu_mf/version.map
> > b/drivers/raw/afu_mf/version.map new file mode 100644 index
> > 0000000..c2e0723
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/version.map
> > @@ -0,0 +1,3 @@
> > +DPDK_22 {
> > +	local: *;
> > +};
> > diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build index
> > 05e7de1..c3627f7 100644
> > --- a/drivers/raw/meson.build
> > +++ b/drivers/raw/meson.build
> > @@ -6,6 +6,7 @@ if is_windows
> >  endif
> >
> >  drivers = [
> > +        'afu_mf',
> >          'cnxk_bphy',
> >          'cnxk_gpio',
> >          'dpaa2_cmdif',
> > --
> > 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
  2022-06-06  2:00             ` Zhang, Tianfei
@ 2022-06-07  2:36               ` Huang, Wei
  0 siblings, 0 replies; 57+ messages in thread
From: Huang, Wei @ 2022-06-07  2:36 UTC (permalink / raw)
  To: Zhang, Tianfei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Zhang, Tianfei <tianfei.zhang@intel.com>
> Sent: Monday, June 6, 2022 10:00
> To: Zhang, Tianfei <tianfei.zhang@intel.com>; Huang, Wei
> <wei.huang@intel.com>; dev@dpdk.org; thomas@monjalon.net;
> nipun.gupta@nxp.com; hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> 
> 
> 
> > -----Original Message-----
> > From: Zhang, Tianfei <tianfei.zhang@intel.com>
> > Sent: Monday, June 6, 2022 9:53 AM
> > To: Huang, Wei <wei.huang@intel.com>; dev@dpdk.org;
> > thomas@monjalon.net; nipun.gupta@nxp.com;
> hemant.agrawal@nxp.com
> > Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Qi Z
> > <qi.z.zhang@intel.com>
> > Subject: RE: [PATCH v5 1/5] drivers/raw: introduce AFU raw device
> > driver
> >
> >
> >
> > > -----Original Message-----
> > > From: Huang, Wei <wei.huang@intel.com>
> > > Sent: Friday, May 27, 2022 1:37 PM
> > > To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> > > hemant.agrawal@nxp.com
> > > Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> > > <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> > > Huang, Wei <wei.huang@intel.com>
> > > Subject: [PATCH v5 1/5] drivers/raw: introduce AFU raw device driver
> > >
> > > Add multi-function AFU raw device driver to manage various AFU
> > > (Acceleration Function Unit) in FPGA.
> > > This driver implements common AFU raw device interfaces and exposes
> > > them to application as standard raw device APIs.
> > > Normal application can operate specified AFU as below, 1. call
> > > rte_rawdev_pmd_get_named_dev() to find AFU raw device.
> > > 2. call rte_rawdev_configure() to initialize AFU raw device.
> > > 3. call rte_rawdev_selftest() to test function of AFU.
> > >
> > > Signed-off-by: Wei Huang <wei.huang@intel.com>
> > > ---
> > > v2: fix typo
> > > ---
> > > v3: fix build error in FreeBSD13-64, UB2004-32 and UB2204-32
> > > ---
> > > v4: fix coding style issue and build error in FreeBSD13-64
> > > ---
> > > v5: split patch into several patches
> > > ---
> > >  drivers/raw/afu_mf/afu_mf_rawdev.c | 425
> > > +++++++++++++++++++++++++++++++++++++
> > >  drivers/raw/afu_mf/afu_mf_rawdev.h |  71 +++++++
> > >  drivers/raw/afu_mf/meson.build     |   5 +
> > >  drivers/raw/afu_mf/version.map     |   3 +
> > >  drivers/raw/meson.build            |   1 +
> > >  5 files changed, 505 insertions(+)
> > >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.c
> > >  create mode 100644 drivers/raw/afu_mf/afu_mf_rawdev.h
> > >  create mode 100644 drivers/raw/afu_mf/meson.build  create mode
> > > 100644 drivers/raw/afu_mf/version.map
> >
> > I am thinking that just put those afu* c/h files into
> > drivers/raw/ifpga/ folder is better? Because this AFU PMD driver is work on
> ifpga.
> > And the file name of "afu_mf_rawdev.c" change to "afu_pmd_driver.c"?
> 
> Ohm, the file name of "afu_mf_rawdev.c" change to "afu_pmd_core.c"?
> Because this file is the common code of AFU PMD driver, and specific
> function are in separate c file, like afu_pmd_n3000.c
> 
agree
> >
> > >
> > > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> > > b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > > new file mode 100644
> > > index 0000000..5be372a
> > > --- /dev/null
> > > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > > @@ -0,0 +1,425 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright 2022 Intel Corporation  */
> > > +
> > > +#include <errno.h>
> > > +#include <stdio.h>
> > > +#include <stdint.h>
> > > +#include <stdlib.h>
> > > +#include <string.h>
> > > +#include <unistd.h>
> > > +#include <fcntl.h>
> > > +#include <poll.h>
> > > +#include <sys/eventfd.h>
> > > +
> > > +#include <rte_eal.h>
> > > +#include <rte_malloc.h>
> > > +#include <rte_memzone.h>
> > > +#include <rte_rawdev_pmd.h>
> > > +
> > > +#include "afu_mf_rawdev.h"
> > > +
> > > +#define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> > > +
> > > +static const struct rte_afu_uuid afu_uuid_map[] = {
> > > +	{ 0, 0 /* sentinel */ }
> > > +};
> > > +
> > > +static struct afu_mf_drv *afu_table[] = {
> > > +	NULL
> > > +};
> > > +
> > > +static inline int afu_mf_trylock(struct afu_mf_rawdev *dev) {
> > > +	int32_t x = 0;
> > > +
> > > +	if (!dev || !dev->shared)
> > > +		return -ENODEV;
> > > +
> > > +	x = __atomic_load_n(&dev->shared->lock, __ATOMIC_RELAXED);
> > > +
> > > +	if ((x != 0) || (__atomic_compare_exchange_n(&dev->shared->lock,
> > > +&x,
> > > 1,
> > > +				1, __ATOMIC_ACQUIRE,
> __ATOMIC_RELAXED)
> > > == 0))
> > > +		return -EBUSY;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static inline void afu_mf_unlock(struct afu_mf_rawdev *dev) {
> > > +	if (!dev || !dev->shared)
> > > +		return;
> > > +
> > > +	__atomic_store_n(&dev->shared->lock, 0, __ATOMIC_RELEASE); }
> > > +
> > > +static int afu_mf_rawdev_configure(const struct rte_rawdev *rawdev,
> > > +	rte_rawdev_obj_t config, size_t config_size) {
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	int ret = 0;
> > > +
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > > +	if (!dev)
> > > +		return -ENODEV;
> > > +
> > > +	if (dev->ops && dev->ops->config)
> > > +		ret = (*dev->ops->config)(dev, config, config_size);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_start(struct rte_rawdev *rawdev) {
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	int ret = 0;
> > > +
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > > +	if (!dev)
> > > +		return -ENODEV;
> > > +
> > > +	ret = afu_mf_trylock(dev);
> > > +	if (ret) {
> > > +		AFU_MF_PMD_WARN("AFU is busy, please start it later");
> > > +		return ret;
> > > +	}
> > > +
> > > +	if (dev->ops && dev->ops->start)
> > > +		ret = (*dev->ops->start)(dev);
> > > +
> > > +	afu_mf_unlock(dev);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static void afu_mf_rawdev_stop(struct rte_rawdev *rawdev) {
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	int ret = 0;
> > > +
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > > +	if (!dev)
> > > +		return;
> > > +
> > > +	ret = afu_mf_trylock(dev);
> > > +	if (ret) {
> > > +		AFU_MF_PMD_WARN("AFU is busy, please stop it later");
> > > +		return;
> > > +	}
> > > +
> > > +	if (dev->ops && dev->ops->stop)
> > > +		ret = (*dev->ops->stop)(dev);
> > > +
> > > +	afu_mf_unlock(dev);
> > > +}
> > > +
> > > +static int afu_mf_rawdev_close(struct rte_rawdev *rawdev) {
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	int ret = 0;
> > > +
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > > +	if (!dev)
> > > +		return -ENODEV;
> > > +
> > > +	if (dev->ops && dev->ops->close)
> > > +		ret = (*dev->ops->close)(dev);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_reset(struct rte_rawdev *rawdev) {
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	int ret = 0;
> > > +
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > > +	if (!dev)
> > > +		return -ENODEV;
> > > +
> > > +	ret = afu_mf_trylock(dev);
> > > +	if (ret) {
> > > +		AFU_MF_PMD_WARN("AFU is busy, please reset it later");
> > > +		return ret;
> > > +	}
> > > +
> > > +	if (dev->ops && dev->ops->reset)
> > > +		ret = (*dev->ops->reset)(dev);
> > > +
> > > +	afu_mf_unlock(dev);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_selftest(uint16_t dev_id) {
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	int ret = 0;
> > > +
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +
> > > +	if (!rte_rawdev_pmd_is_valid_dev(dev_id))
> > > +		return -ENODEV;
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(&rte_rawdevs[dev_id]);
> > > +	if (!dev)
> > > +		return -ENOENT;
> > > +
> > > +	ret = afu_mf_trylock(dev);
> > > +	if (ret) {
> > > +		AFU_MF_PMD_WARN("AFU is busy, please test it later");
> > > +		return ret;
> > > +	}
> > > +
> > > +	if (dev->ops && dev->ops->test)
> > > +		ret = (*dev->ops->test)(dev);
> > > +
> > > +	afu_mf_unlock(dev);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_dump(struct rte_rawdev *rawdev, FILE *f) {
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	int ret = 0;
> > > +
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > > +	if (!dev)
> > > +		return -ENODEV;
> > > +
> > > +	if (dev->ops && dev->ops->dump)
> > > +		ret = (*dev->ops->dump)(dev, f);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static const struct rte_rawdev_ops afu_mf_rawdev_ops = {
> > > +	.dev_info_get = NULL,
> > > +	.dev_configure = afu_mf_rawdev_configure,
> > > +	.dev_start = afu_mf_rawdev_start,
> > > +	.dev_stop = afu_mf_rawdev_stop,
> > > +	.dev_close = afu_mf_rawdev_close,
> > > +	.dev_reset = afu_mf_rawdev_reset,
> > > +
> > > +	.queue_def_conf = NULL,
> > > +	.queue_setup = NULL,
> > > +	.queue_release = NULL,
> > > +	.queue_count = NULL,
> > > +
> > > +	.attr_get = NULL,
> > > +	.attr_set = NULL,
> > > +
> > > +	.enqueue_bufs = NULL,
> > > +	.dequeue_bufs = NULL,
> > > +
> > > +	.dump = afu_mf_rawdev_dump,
> > > +
> > > +	.xstats_get = NULL,
> > > +	.xstats_get_names = NULL,
> > > +	.xstats_get_by_name = NULL,
> > > +	.xstats_reset = NULL,
> > > +
> > > +	.firmware_status_get = NULL,
> > > +	.firmware_version_get = NULL,
> > > +	.firmware_load = NULL,
> > > +	.firmware_unload = NULL,
> > > +
> > > +	.dev_selftest = afu_mf_rawdev_selftest, };
> > > +
> > > +static int
> > > +afu_mf_shared_alloc(const char *name, struct afu_mf_shared **data,
> > > +	int socket_id)
> > > +{
> > > +	const struct rte_memzone *mz;
> > > +	char mz_name[RTE_MEMZONE_NAMESIZE];
> > > +	struct afu_mf_shared *ptr = NULL;
> > > +	int init_mz = 0;
> > > +
> > > +	if (!name || !data)
> > > +		return -EINVAL;
> > > +
> > > +	/* name format is afu_?|??:??.? which is unique */
> > > +	snprintf(mz_name, sizeof(mz_name), "%s", name);
> > > +
> > > +	mz = rte_memzone_lookup(mz_name);
> > > +	if (!mz) {
> > > +		mz = rte_memzone_reserve(mz_name,
> > > +				sizeof(struct afu_mf_shared),
> > > +				socket_id, 0);
> > > +		init_mz = 1;
> > > +	}
> > > +
> > > +	if (!mz) {
> > > +		AFU_MF_PMD_ERR("Allocate memory zone %s failed!",
> > > +			mz_name);
> > > +		return -ENOMEM;
> > > +	}
> > > +
> > > +	ptr = (struct afu_mf_shared *)mz->addr;
> > > +
> > > +	if (init_mz)  /* initialize memory zone on the first time */
> > > +		ptr->lock = 0;
> > > +
> > > +	*data = ptr;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_name_get(struct rte_afu_device *afu_dev,
> > > +char
> > > *name,
> > > +	size_t size)
> > > +{
> > > +	int n = 0;
> > > +
> > > +	if (!afu_dev || !name || !size)
> > > +		return -EINVAL;
> > > +
> > > +	n = snprintf(name, size, "afu_%s", afu_dev->device.name);
> > > +	if (n >= (int)size) {
> > > +		AFU_MF_PMD_ERR("Name of AFU device is too long!");
> > > +		return -ENAMETOOLONG;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static struct afu_mf_ops *afu_mf_ops_get(struct rte_afu_uuid *afu_id)
> {
> > > +	struct afu_mf_drv *entry = NULL;
> > > +	int i = 0;
> > > +
> > > +	if (!afu_id)
> > > +		return NULL;
> > > +
> > > +	while ((entry = afu_table[i++])) {
> > > +		if ((entry->uuid.uuid_low == afu_id->uuid_low) &&
> > > +			(entry->uuid.uuid_high == afu_id->uuid_high))
> > > +			break;
> > > +	}
> > > +
> > > +	return entry ? entry->ops : NULL;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_create(struct rte_afu_device *afu_dev, int
> > > +socket_id) {
> > > +	struct rte_rawdev *rawdev = NULL;
> > > +	struct afu_mf_rawdev *dev = NULL;
> > > +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> > > +	int ret = 0;
> > > +
> > > +	if (!afu_dev)
> > > +		return -EINVAL;
> > > +
> > > +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	AFU_MF_PMD_INFO("Create raw device %s on NUMA node %d",
> > > +		name, socket_id);
> > > +
> > > +	/* Allocate device structure */
> > > +	rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct
> > > afu_mf_rawdev),
> > > +				socket_id);
> > > +	if (!rawdev) {
> > > +		AFU_MF_PMD_ERR("Unable to allocate raw device");
> > > +		return -ENOMEM;
> > > +	}
> > > +
> > > +	rawdev->dev_ops = &afu_mf_rawdev_ops;
> > > +	rawdev->device = &afu_dev->device;
> > > +	rawdev->driver_name = afu_dev->driver->driver.name;
> > > +
> > > +	dev = afu_mf_rawdev_get_priv(rawdev);
> > > +	if (!dev)
> > > +		goto cleanup;
> > > +
> > > +	dev->rawdev = rawdev;
> > > +	dev->port = afu_dev->id.port;
> > > +	dev->addr = afu_dev->mem_resource[0].addr;
> > > +	dev->ops = afu_mf_ops_get(&afu_dev->id.uuid);
> > > +	if (dev->ops == NULL) {
> > > +		AFU_MF_PMD_ERR("Unsupported AFU device");
> > > +		goto cleanup;
> > > +	}
> > > +
> > > +	if (dev->ops->init) {
> > > +		ret = (*dev->ops->init)(dev);
> > > +		if (ret) {
> > > +			AFU_MF_PMD_ERR("Failed to init %s", name);
> > > +			goto cleanup;
> > > +		}
> > > +	}
> > > +
> > > +	ret = afu_mf_shared_alloc(name, &dev->shared, socket_id);
> > > +	if (ret)
> > > +		goto cleanup;
> > > +
> > > +	return ret;
> > > +
> > > +cleanup:
> > > +	rte_rawdev_pmd_release(rawdev);
> > > +	return ret;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_destroy(struct rte_afu_device *afu_dev) {
> > > +	struct rte_rawdev *rawdev = NULL;
> > > +	char name[RTE_RAWDEV_NAME_MAX_LEN] = {0};
> > > +	int ret = 0;
> > > +
> > > +	if (!afu_dev)
> > > +		return -EINVAL;
> > > +
> > > +	ret = afu_mf_rawdev_name_get(afu_dev, name, sizeof(name));
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	AFU_MF_PMD_INFO("Destroy raw device %s", name);
> > > +
> > > +	rawdev = rte_rawdev_pmd_get_named_dev(name);
> > > +	if (!rawdev) {
> > > +		AFU_MF_PMD_ERR("Raw device %s not found", name);
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	/* rte_rawdev_close is called by pmd_release */
> > > +	ret = rte_rawdev_pmd_release(rawdev);
> > > +	if (ret)
> > > +		AFU_MF_PMD_DEBUG("Device cleanup failed");
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int afu_mf_rawdev_probe(struct rte_afu_device *afu_dev) {
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +	return afu_mf_rawdev_create(afu_dev, rte_socket_id()); }
> > > +
> > > +static int afu_mf_rawdev_remove(struct rte_afu_device *afu_dev) {
> > > +	AFU_MF_PMD_FUNC_TRACE();
> > > +	return afu_mf_rawdev_destroy(afu_dev); }
> > > +
> > > +static struct rte_afu_driver afu_mf_pmd_drv = {
> > > +	.id_table = afu_uuid_map,
> > > +	.probe = afu_mf_rawdev_probe,
> > > +	.remove = afu_mf_rawdev_remove
> > > +};
> > > +
> > > +RTE_PMD_REGISTER_AFU(AFU_MF_PMD_RAWDEV_NAME,
> > afu_mf_pmd_drv);
> > > +RTE_LOG_REGISTER_DEFAULT(afu_mf_pmd_logtype, NOTICE);
> > > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h
> > > b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > > new file mode 100644
> > > index 0000000..df6715c
> > > --- /dev/null
> > > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > > @@ -0,0 +1,71 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright 2022 Intel Corporation  */
> > > +
> > > +#ifndef __AFU_MF_RAWDEV_H__
> > > +#define __AFU_MF_RAWDEV_H__
> > > +
> > > +#include <stdint.h>
> > > +#include <stdio.h>
> > > +#include <unistd.h>
> > > +
> > > +#include <rte_cycles.h>
> > > +#include <rte_bus_ifpga.h>
> > > +#include <rte_rawdev.h>
> > > +
> > > +extern int afu_mf_pmd_logtype;
> > > +
> > > +#define AFU_MF_PMD_LOG(level, fmt, args...) \
> > > +	rte_log(RTE_LOG_ ## level, afu_mf_pmd_logtype, "%s(): " fmt "\n",
> \
> > > +		__func__, ##args)
> > > +
> > > +#define AFU_MF_PMD_FUNC_TRACE() AFU_MF_PMD_LOG(DEBUG,
> ">>")
> > > +
> > > +#define AFU_MF_PMD_DEBUG(fmt, args...) \
> > > +	AFU_MF_PMD_LOG(DEBUG, fmt, ## args) #define
> AFU_MF_PMD_INFO(fmt,
> > > +args...) \
> > > +	AFU_MF_PMD_LOG(INFO, fmt, ## args) #define
> AFU_MF_PMD_ERR(fmt,
> > > +args...) \
> > > +	AFU_MF_PMD_LOG(ERR, fmt, ## args)
> > > +#define AFU_MF_PMD_WARN(fmt, args...) \
> > > +	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
> > > +
> > > +struct afu_mf_rawdev;
> > > +
> > > +struct afu_mf_ops {
> > > +	int (*init)(struct afu_mf_rawdev *dev);
> > > +	int (*config)(struct afu_mf_rawdev *dev, void *config,
> > > +		size_t config_size);
> > > +	int (*start)(struct afu_mf_rawdev *dev);
> > > +	int (*stop)(struct afu_mf_rawdev *dev);
> > > +	int (*test)(struct afu_mf_rawdev *dev);
> > > +	int (*close)(struct afu_mf_rawdev *dev);
> > > +	int (*reset)(struct afu_mf_rawdev *dev);
> > > +	int (*dump)(struct afu_mf_rawdev *dev, FILE *f); };
> > > +
> > > +struct afu_mf_drv {
> > > +	struct rte_afu_uuid uuid;
> > > +	struct afu_mf_ops *ops;
> > > +};
> > > +
> > > +struct afu_mf_shared {
> > > +	int32_t lock;
> > > +};
> > > +
> > > +struct afu_mf_rawdev {
> > > +	struct rte_rawdev *rawdev;  /* point to parent raw device */
> > > +	struct afu_mf_shared *shared;  /* shared data for multi-process */
> > > +	struct afu_mf_ops *ops;  /* device operation functions */
> > > +	int port;  /* index of port the AFU attached */
> > > +	void *addr;  /* base address of AFU registers */
> > > +	void *priv;  /* private driver data */ };
> > > +
> > > +static inline struct afu_mf_rawdev * afu_mf_rawdev_get_priv(const
> > > +struct rte_rawdev *rawdev) {
> > > +	return rawdev ? (struct afu_mf_rawdev *)rawdev->dev_private :
> > > +NULL; }
> > > +
> > > +#endif /* __AFU_MF_RAWDEV_H__ */
> > > diff --git a/drivers/raw/afu_mf/meson.build
> > > b/drivers/raw/afu_mf/meson.build new file mode 100644 index
> > > 0000000..80526a2
> > > --- /dev/null
> > > +++ b/drivers/raw/afu_mf/meson.build
> > > @@ -0,0 +1,5 @@
> > > +# SPDX-License-Identifier: BSD-3-Clause # Copyright 2022 Intel
> > > +Corporation
> > > +
> > > +deps += ['rawdev', 'bus_pci', 'bus_ifpga'] sources =
> > > +files('afu_mf_rawdev.c')
> > > diff --git a/drivers/raw/afu_mf/version.map
> > > b/drivers/raw/afu_mf/version.map new file mode 100644 index
> > > 0000000..c2e0723
> > > --- /dev/null
> > > +++ b/drivers/raw/afu_mf/version.map
> > > @@ -0,0 +1,3 @@
> > > +DPDK_22 {
> > > +	local: *;
> > > +};
> > > diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build index
> > > 05e7de1..c3627f7 100644
> > > --- a/drivers/raw/meson.build
> > > +++ b/drivers/raw/meson.build
> > > @@ -6,6 +6,7 @@ if is_windows
> > >  endif
> > >
> > >  drivers = [
> > > +        'afu_mf',
> > >          'cnxk_bphy',
> > >          'cnxk_gpio',
> > >          'dpaa2_cmdif',
> > > --
> > > 1.8.3.1


^ permalink raw reply	[flat|nested] 57+ messages in thread

* RE: [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver
  2022-06-06  1:38           ` Zhang, Tianfei
@ 2022-06-07  2:40             ` Huang, Wei
  0 siblings, 0 replies; 57+ messages in thread
From: Huang, Wei @ 2022-06-07  2:40 UTC (permalink / raw)
  To: Zhang, Tianfei, dev, thomas, nipun.gupta, hemant.agrawal
  Cc: stable, Xu, Rosen, Zhang, Qi Z



> -----Original Message-----
> From: Zhang, Tianfei <tianfei.zhang@intel.com>
> Sent: Monday, June 6, 2022 09:39
> To: Huang, Wei <wei.huang@intel.com>; dev@dpdk.org;
> thomas@monjalon.net; nipun.gupta@nxp.com; hemant.agrawal@nxp.com
> Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: RE: [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver
> 
> 
> 
> > -----Original Message-----
> > From: Huang, Wei <wei.huang@intel.com>
> > Sent: Friday, May 27, 2022 1:37 PM
> > To: dev@dpdk.org; thomas@monjalon.net; nipun.gupta@nxp.com;
> > hemant.agrawal@nxp.com
> > Cc: stable@dpdk.org; Xu, Rosen <rosen.xu@intel.com>; Zhang, Tianfei
> > <tianfei.zhang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Huang,
> Wei
> > <wei.huang@intel.com>
> > Subject: [PATCH v5 2/5] raw/afu_mf: add N3000 AFU driver
> >
> > N3000 AFU includes NLB0 and DMA modules, NLB0 is used to test PCI bus
> > and DMA is used to test local memory.
> > This driver initialize the modules and report test result.
> >
> > Signed-off-by: Wei Huang <wei.huang@intel.com>
> > ---
> >  drivers/raw/afu_mf/afu_mf_rawdev.c |    4 +
> >  drivers/raw/afu_mf/afu_mf_rawdev.h |   18 +
> >  drivers/raw/afu_mf/meson.build     |    4 +-
> >  drivers/raw/afu_mf/n3000_afu.c     | 2005
> > ++++++++++++++++++++++++++++++++++++
> >  drivers/raw/afu_mf/n3000_afu.h     |  333 ++++++
> >  drivers/raw/afu_mf/rte_pmd_afu.h   |   97 ++
> >  6 files changed, 2460 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/raw/afu_mf/n3000_afu.c
> >  create mode 100644 drivers/raw/afu_mf/n3000_afu.h
> >  create mode 100644 drivers/raw/afu_mf/rte_pmd_afu.h
> >
> > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.c
> > b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > index 5be372a..7c18f3b 100644
> > --- a/drivers/raw/afu_mf/afu_mf_rawdev.c
> > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.c
> > @@ -17,15 +17,19 @@
> >  #include <rte_memzone.h>
> >  #include <rte_rawdev_pmd.h>
> >
> > +#include "rte_pmd_afu.h"
> >  #include "afu_mf_rawdev.h"
> > +#include "n3000_afu.h"
> >
> >  #define AFU_MF_PMD_RAWDEV_NAME rawdev_afu_mf
> >
> >  static const struct rte_afu_uuid afu_uuid_map[] = {
> > +	{ N3000_AFU_UUID_L, N3000_AFU_UUID_H },
> >  	{ 0, 0 /* sentinel */ }
> >  };
> >
> >  static struct afu_mf_drv *afu_table[] = {
> > +	&n3000_afu_drv,
> >  	NULL
> >  };
> >
> > diff --git a/drivers/raw/afu_mf/afu_mf_rawdev.h
> > b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > index df6715c..5a66f6c 100644
> > --- a/drivers/raw/afu_mf/afu_mf_rawdev.h
> > +++ b/drivers/raw/afu_mf/afu_mf_rawdev.h
> > @@ -30,6 +30,24 @@
> >  #define AFU_MF_PMD_WARN(fmt, args...) \
> >  	AFU_MF_PMD_LOG(WARNING, fmt, ## args)
> >
> > +#define CLS_TO_SIZE(n)  ((n) << 6)  /* get size of n cache lines */
> > +#define SIZE_TO_CLS(s)  ((s) >> 6)  /* convert size to number of cache
> lines */
> > +#define MHZ(f)  ((f) * 1000000)
> > +
> > +#define dsm_poll_timeout(addr, val, cond, invl, timeout) \
> > +({                                                       \
> > +	uint64_t __wait = 0;                                 \
> > +	uint64_t __invl = (invl);                            \
> > +	uint64_t __timeout = (timeout);                      \
> > +	for (; __wait <= __timeout; __wait += __invl) {      \
> > +		(val) = *(addr);                                 \
> > +		if (cond)                                        \
> > +			break;                                       \
> > +		rte_delay_ms(__invl);                            \
> > +	}                                                    \
> > +	(cond) ? 0 : 1;                                      \
> > +})
> 
> Dsm means DMA?
DSM means 'DMA Status Memory'
> 
> > +
> >  struct afu_mf_rawdev;
> >
> >  struct afu_mf_ops {
> > diff --git a/drivers/raw/afu_mf/meson.build
> b/drivers/raw/afu_mf/meson.build
> > index 80526a2..8a989e3 100644
> > --- a/drivers/raw/afu_mf/meson.build
> > +++ b/drivers/raw/afu_mf/meson.build
> > @@ -2,4 +2,6 @@
> >  # Copyright 2022 Intel Corporation
> >
> >  deps += ['rawdev', 'bus_pci', 'bus_ifpga']
> > -sources = files('afu_mf_rawdev.c')
> > +sources = files('afu_mf_rawdev.c', 'n3000_afu.c')
> > +
> > +headers = files('rte_pmd_afu.h')
> > diff --git a/drivers/raw/afu_mf/n3000_afu.c
> b/drivers/raw/afu_mf/n3000_afu.c
> > new file mode 100644
> > index 0000000..19d7c54
> > --- /dev/null
> > +++ b/drivers/raw/afu_mf/n3000_afu.c
> > @@ -0,0 +1,2005 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2022 Intel Corporation
> > + */
> > +
> > +#include <errno.h>
> > +#include <stdio.h>
> > +#include <stdint.h>
> > +#include <stdlib.h>
> > +#include <inttypes.h>
> > +#include <unistd.h>
> > +#include <fcntl.h>
> > +#include <poll.h>
> > +#include <sys/eventfd.h>
> > +#include <sys/ioctl.h>
> > +
> > +#include <rte_eal.h>
> > +#include <rte_malloc.h>
> > +#include <rte_memcpy.h>
> > +#include <rte_io.h>
> > +#include <rte_vfio.h>
> > +#include <rte_bus_pci.h>
> > +#include <rte_bus_ifpga.h>
> > +#include <rte_rawdev.h>
> > +
> > +#include "afu_mf_rawdev.h"
> > +#include "n3000_afu.h"
> > +
> > +static int nlb_afu_config(struct afu_mf_rawdev *dev)
> > +{
> > +	struct n3000_afu_priv *priv = NULL;
> > +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> > +	struct nlb_csr_cfg v;
> > +
> > +	if (!dev)
> > +		return -EINVAL;
> > +
> > +	if (!dev->priv)
> > +		return -ENOENT;
> > +
> > +	priv = (struct n3000_afu_priv *)dev->priv;
> > +	cfg = &priv->nlb_cfg;
> > +
> > +	v.csr = 0;
> > +
> > +	if (cfg->cont)
> > +		v.cont = 1;
> > +
> > +	if (cfg->cache_policy == NLB_WRPUSH_I)
> > +		v.wrpush_i = 1;
> > +	else
> > +		v.wrthru_en = cfg->cache_policy;
> > +
> > +	if (cfg->cache_hint == NLB_RDLINE_MIXED)
> > +		v.rdsel = 3;
> > +	else
> > +		v.rdsel = cfg->cache_hint;
> > +
> > +	v.mode = cfg->mode;
> > +	v.chsel = cfg->read_vc;
> > +	v.wr_chsel = cfg->write_vc;
> > +	v.wrfence_chsel = cfg->wrfence_vc;
> > +	v.wrthru_en = cfg->cache_policy;
> > +	v.multicl_len = cfg->multi_cl - 1;
> > +
> > +	AFU_MF_PMD_DEBUG("cfg: 0x%08x", v.csr);
> > +	rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
> > +
> > +	return 0;
> > +}
> > +
> > +static void nlb_afu_report(struct afu_mf_rawdev *dev, uint32_t cl)
> > +{
> > +	struct n3000_afu_priv *priv = NULL;
> > +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> > +	struct nlb_dsm_status *stat = NULL;
> > +	uint64_t ticks = 0;
> > +	double num, rd_bw, wr_bw;
> > +
> > +	if (!dev || !dev->priv)
> > +		return;
> > +
> > +	priv = (struct n3000_afu_priv *)dev->priv;
> > +
> > +	cfg = &priv->nlb_cfg;
> > +	stat = priv->nlb_ctx.status_ptr;
> > +
> > +	if (cfg->cont)
> > +		ticks = stat->num_clocks - stat->start_overhead;
> > +	else
> > +		ticks = stat->num_clocks -
> > +			(stat->start_overhead + stat->end_overhead);
> > +
> > +	if (cfg->freq_mhz == 0)
> > +		cfg->freq_mhz = 200;
> > +
> > +	num = (double)stat->num_reads;
> > +	rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> > +	num = (double)stat->num_writes;
> > +	wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
> > +
> > +	printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
> > +		"Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
> > +	printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
> > +		cl, stat->num_reads, stat->num_writes, ticks,
> > +		rd_bw / 1e9, wr_bw / 1e9);
> > +}
> > +
> > +static int nlb_afu_test(struct afu_mf_rawdev *dev)
> > +{
> > +	struct n3000_afu_priv *priv = NULL;
> > +	struct nlb_afu_ctx *ctx = NULL;
> > +	struct rte_pmd_afu_nlb_cfg *cfg = NULL;
> > +	struct nlb_csr_ctl ctl;
> > +	uint32_t *ptr = NULL;
> > +	uint32_t i, j, cl, val = 0;
> > +	uint64_t sval = 0;
> > +	int ret = 0;
> > +
> > +	if (!dev)
> > +		return -EINVAL;
> > +
> > +	if (!dev->priv)
> > +		return -ENOENT;
> > +
> > +	priv = (struct n3000_afu_priv *)dev->priv;
> > +	ctx = &priv->nlb_ctx;
> > +	cfg = &priv->nlb_cfg;
> > +
> > +	/* initialize registers */
> > +	AFU_MF_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
> > +	rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
> > +
> > +	ctl.csr = 0;
> > +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> > +	ctl.reset = 1;
> > +	rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> > +
> > +	AFU_MF_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
> > +	rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr +
> CSR_SRC_ADDR);
> > +	AFU_MF_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
> > +	rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr +
> CSR_DST_ADDR);
> > +
> > +	ret = nlb_afu_config(dev);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* initialize src data */
> > +	ptr = (uint32_t *)ctx->src_ptr;
> > +	j = CLS_TO_SIZE(cfg->end) >> 2;
> > +	for (i = 0; i < j; i++)
> > +		*ptr++ = i;
> > +
> > +	/* start test */
> > +	for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
> > +		memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
> > +		memset(ctx->dsm_ptr, 0, DSM_SIZE);
> > +
> > +		ctl.csr = 0;
> > +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> > +		ctl.reset = 1;
> > +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> > +
> > +		rte_write32(cl, ctx->addr + CSR_NUM_LINES);
> > +
> > +		rte_delay_us(10);
> > +
> > +		ctl.start = 1;
> > +		rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> > +
> > +		if (cfg->cont) {
> > +			rte_delay_ms(cfg->timeout * 1000);
> > +			ctl.force_completion = 1;
> > +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> > +			ret = dsm_poll_timeout(&ctx->status_ptr-
> > >test_complete,
> > +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> > +				DSM_TIMEOUT);
> > +			if (ret) {
> > +				printf("DSM poll timeout\n");
> > +				goto end;
> > +			}
> > +		} else {
> > +			ret = dsm_poll_timeout(&ctx->status_ptr-
> > >test_complete,
> > +				val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
> > +				DSM_TIMEOUT);
> > +			if (ret) {
> > +				printf("DSM poll timeout\n");
> > +				goto end;
> > +			}
> > +			ctl.force_completion = 1;
> > +			rte_write32(ctl.csr, ctx->addr + CSR_CTL);
> > +		}
> > +
> > +		nlb_afu_report(dev, cl);
> > +
> > +		i = 0;
> > +		while (i++ < 100) {
> > +			sval = rte_read64(ctx->addr + CSR_STATUS1);
> > +			if (sval == 0)
> > +				break;
> > +			rte_delay_us(1000);
> > +		}
> > +
> > +		ptr = (uint32_t *)ctx->dest_ptr;
> > +		j = CLS_TO_SIZE(cl) >> 2;
> > +		for (i = 0; i < j; i++) {
> > +			if (*ptr++ != i) {
> > +				AFU_MF_PMD_ERR("Data mismatch @ %u",
> i);
> > +				break;
> > +			}
> > +		}
> > +	}
> > +
> > +end:
> > +	return ret;
> > +}
> > +
> > +static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
> > +{
> > +	int i = 0;
> > +
> > +	if (!ctx)
> > +		return;
> > +
> > +	for (i = 0; i < NUM_DMA_BUF; i++) {
> > +		rte_free(ctx->dma_buf[i]);
> > +		ctx->dma_buf[i] = NULL;
> > +	}
> > +
> > +	rte_free(ctx->data_buf);
> > +	ctx->data_buf = NULL;
> > +
> > +	rte_free(ctx->ref_buf);
> > +	ctx->ref_buf = NULL;
> > +}
> > +
> > +static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
> > +	struct rte_pmd_afu_dma_cfg *cfg)
> > +{
> > +	size_t page_sz = sysconf(_SC_PAGE_SIZE);
> > +	int i, ret = 0;
> > +
> > +	if (!ctx || !cfg)
> > +		return -EINVAL;
> > +
> > +	for (i = 0; i < NUM_DMA_BUF; i++) {
> > +		ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
> > +			TEST_MEM_ALIGN);
> > +		if (!ctx->dma_buf[i]) {
> > +			ret = -ENOMEM;
> > +			goto free;
> > +		}
> > +		ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
> > +		if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
> > +			ret = -ENOMEM;
> > +			goto free;
> > +		}
> > +	}
> > +
> > +	ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
> > +	if (!ctx->data_buf) {
> > +		ret = -ENOMEM;
> > +		goto free;
> > +	}
> > +
> > +	ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
> > +	if (!ctx->ref_buf) {
> > +		ret = -ENOMEM;
> > +		goto free;
> > +	}
> 
> Suppose that If ctx->ref_buf alloc fail, the dma_afu_buf_free() will work
> correct?
> 
dma_afu_buf_free() can work correct in failure situation, but it's not standard exception handle, I will change it.
> > +
> > +	return 0;
> > +
> > +free:
> > +	dma_afu_buf_free(ctx);
> > +	return ret;
> > +}
> > +
> > +static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
> > +{
> > +	int *ptr = NULL;
> > +	size_t i = 0;
> > +	size_t dword_size = 0;
> > +
> > +	if (!ctx || !size)
> > +		return;
> > +
> > +	ptr = (int *)ctx->ref_buf;
> > +
> > +	if (ctx->pattern) {
> > +		memset(ptr, ctx->pattern, size);
> > +	} else {
> > +		srand(99);
> > +		dword_size = size >> 2;
> > +		for (i = 0; i < dword_size; i++)
> > +			*ptr++ = rand();
> > +	}
> > +	rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
> > +}
> > +
> > +static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
> > +{
> > +	uint8_t *src = NULL;
> > +	uint8_t *dst = NULL;
> > +	size_t i = 0;
> > +	int n = 0;
> > +
> > +	if (!ctx || !size)
> > +		return -EINVAL;
> > +
> > +	src = (uint8_t *)ctx->ref_buf;
> > +	dst = (uint8_t *)ctx->data_buf;
> > +
> > +	if (memcmp(src, dst, size)) {
> > +		printf("Transfer is corrupted\n");
> > +		if (ctx->verbose) {
> > +			for (i = 0; i < size; i++) {
> > +				if (*src != *dst) {
> > +					if (++n >= ERR_CHECK_LIMIT)
> > +						break;
> > +					printf("Mismatch at 0x%zx, "
> > +						"Expected %02x  Actual
> > %02x\n",
> > +						i, *src, *dst);
> > +				}
> > +				src++;
> > +				dst++;
> > +			}
> > +			if (n < ERR_CHECK_LIMIT) {
> > +				printf("Found %d error bytes\n", n);
> > +			} else {
> > +				printf("......\n");
> > +				printf("Found more than %d error bytes\n",
> n);
> > +			}
> > +		}
> > +		return -1;
> > +	}
> > +
> > +	printf("Transfer is verified\n");
> > +	return 0;
> > +}
> > +
> > +static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr,
> uint64_t
> > bytes)
> > +{
> > +	uint64_t qwords = bytes / sizeof(uint64_t);
> > +
> > +	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
> > +		!IS_ALIGNED_QWORD((uint64_t)bytes))
> > +		return;
> > +
> > +	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
> > +		rte_write64(*host_addr, dev_addr);
> > +}
> > +
> > +static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t
> > bytes)
> > +{
> > +	uint64_t qwords = bytes / sizeof(uint64_t);
> > +
> > +	if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
> > +		!IS_ALIGNED_QWORD((uint64_t)bytes))
> > +		return;
> > +
> > +	for (; qwords > 0; qwords--, host_addr++, dev_addr++)
> > +		*host_addr = rte_read64(dev_addr);
> > +}
> > +
> > +static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
> > +{
> > +	uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
> > +
> > +	if (!ctx)
> > +		return;
> > +
> > +	if (requested_page != ctx->cur_ase_page) {
> > +		rte_write64(requested_page, ctx->ase_ctrl_addr);
> > +		ctx->cur_ase_page = requested_page;
> > +	}
> > +}
> > +
> > +static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t
> dev_addr,
> > +	uint64_t host_addr, uint32_t count)
> > +{
> > +	uint64_t dev_aligned_addr = 0;
> > +	uint64_t shift = 0;
> > +	uint64_t val = 0;
> > +	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
> > +
> > +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)",
> > host_addr,
> > +		dev_addr, count);
> > +
> > +	if (!ctx || (count >= QWORD_BYTES))
> > +		return -EINVAL;
> > +
> > +	if (!count)
> > +		return 0;
> > +
> > +	switch_ase_page(ctx, dev_addr);
> > +
> > +	shift = dev_addr % QWORD_BYTES;
> > +	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
> > +	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
> > +	rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
> > +
> > +	/* write back to device */
> > +	rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
> > +
> > +	return 0;
> > +}
> > +
> > +static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
> > +	uint64_t *src_ptr, uint64_t *count)
> > +{
> > +	uint64_t src = *src_ptr;
> > +	uint64_t dst = *dst_ptr;
> > +	uint64_t align_bytes = *count;
> > +	uint64_t offset = 0;
> > +	uint64_t left_in_page = DMA_ASE_WINDOW;
> > +	uint64_t size_to_copy = 0;
> > +
> > +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64"
> (0x%"PRIx64")",
> > src, dst,
> > +		align_bytes);
> > +
> > +	if (!ctx || !IS_ALIGNED_DWORD(dst))
> > +		return -EINVAL;
> > +
> > +	if (align_bytes < DWORD_BYTES)
> > +		return 0;
> > +
> > +	if (!IS_ALIGNED_QWORD(dst)) {
> > +		/* Write out a single DWORD to get QWORD aligned */
> > +		switch_ase_page(ctx, dst);
> > +		offset = dst & DMA_ASE_WINDOW_MASK;
> > +
> > +		rte_write32(*(uint32_t *)(uintptr_t)src,
> > +			ctx->ase_data_addr + offset);
> > +		src += DWORD_BYTES;
> > +		dst += DWORD_BYTES;
> > +		align_bytes -= DWORD_BYTES;
> > +	}
> > +
> > +	if (!align_bytes)
> > +		return 0;
> > +
> > +	/* Write out blocks of 64-bit values */
> > +	while (align_bytes >= QWORD_BYTES) {
> > +		left_in_page -= dst & DMA_ASE_WINDOW_MASK;
> > +		size_to_copy =
> > +			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES -
> > 1)));
> > +		if (size_to_copy < QWORD_BYTES)
> > +			break;
> > +		switch_ase_page(ctx, dst);
> > +		offset = dst & DMA_ASE_WINDOW_MASK;
> > +		blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
> > +			(uint64_t *)(uintptr_t)src, size_to_copy);
> > +		src += size_to_copy;
> > +		dst += size_to_copy;
> > +		align_bytes -= size_to_copy;
> > +	}
> > +
> > +	if (align_bytes >= DWORD_BYTES) {
> > +		/* Write out remaining DWORD */
> > +		switch_ase_page(ctx, dst);
> > +		offset = dst & DMA_ASE_WINDOW_MASK;
> > +		rte_write32(*(uint32_t *)(uintptr_t)src,
> > +			ctx->ase_data_addr + offset);
> > +		src += DWORD_BYTES;
> > +		dst += DWORD_BYTES;
> > +		align_bytes -= DWORD_BYTES;
> > +	}
> > +
> > +	*src_ptr = src;
> > +	*dst_ptr = dst;
> > +	*count = align_bytes;
> > +
> > +	return 0;
> > +}
> > +
> > +static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
> > +	uint64_t *src_ptr, uint64_t count)
> > +{
> > +	uint64_t dst = *dst_ptr;
> > +	uint64_t src = *src_ptr;
> > +	uint64_t count_left = count;
> > +	uint64_t unaligned_size = 0;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64"
> (0x%"PRIx64")",
> > src, dst,
> > +		count);
> > +
> > +	/* aligns address to 8 byte using dst masking method */
> > +	if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
> > +		unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
> > +		if (unaligned_size > count_left)
> > +			unaligned_size = count_left;
> > +		ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
> > +		if (ret)
> > +			return ret;
> > +		count_left -= unaligned_size;
> > +		src += unaligned_size;
> > +		dst += unaligned_size;
> > +	}
> > +
> > +	/* Handles 8/4 byte MMIO transfer */
> > +	ret = ase_write(ctx, &dst, &src, &count_left);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* Left over unaligned bytes transferred using dst masking method
> */
> > +	unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
> > +	if (unaligned_size > count_left)
> > +		unaligned_size = count_left;
> > +
> > +	ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
> > +	if (ret)
> > +		return ret;
> > +
> > +	count_left -= unaligned_size;
> > +	*dst_ptr = dst + unaligned_size;
> > +	*src_ptr = src + unaligned_size;
> > +
> > +	return 0;
> > +}
> > +
> > +static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t
> dev_addr,
> > +	uint64_t host_addr, uint32_t count)
> > +{
> > +	uint64_t dev_aligned_addr = 0;
> > +	uint64_t shift = 0;
> > +	uint64_t val = 0;
> > +	uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
> > +
> > +	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)",
> > host_addr,
> > +		dev_addr, count);
> > +
> > +	if (!ctx || (count >= QWORD_BYTES))
> > +		return -EINVAL;
> > +
> > +	if (!count)
> > +		return 0;
> > +
> > +	switch_ase_page(ctx, dev_addr);
> > +
> > +	shift = dev_addr % QWORD_BYTES;
> > +	dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
> > +	val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
> > +	rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
> > +
> > +	return 0;
> > +}
> > +
> > +static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
> > +	uint64_t *dst_ptr, uint64_t *count)
> > +{
> > +	uint64_t src = *src_ptr;
> > +	uint64_t dst = *dst_ptr;
> > +	uint64_t align_bytes = *count;
> > +	uint64_t offset = 0;
> > +	uint64_t left_in_page = DMA_ASE_WINDOW;
> > +	uint64_t size_to_copy = 0;
> > +
> > +	AFU_MF_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64"
> (0x%"PRIx64")",
> > dst, src,
> > +		align_bytes);
> > +
> > +	if (!ctx || !IS_ALIGNED_DWORD(src))
> > +		return -EINVAL;
> > +
> > +	if (align_bytes < DWORD_BYTES)
> > +		return 0;
> > +
> > +	if (!IS_ALIGNED_QWORD(src)) {
> > +		/* Read a single DWORD to get QWORD aligned */
> > +		switch_ase_page(ctx, src);
> > +		offset = src & DMA_ASE_WINDOW_MASK;
> > +		*(uint32_t *)(uintptr_t)dst =
> > +			rte_read32(ctx->ase_data_addr + offset);
> > +		src += DWORD_BYTES;
> > +		dst += DWORD_BYTES;
> > +		align_bytes -= DWORD_BYTES;
> > +	}
> > +
> > +	if (!align_bytes)
> > +		return 0;
> > +
> > +	/* Read blocks of 64-bit values */
> > +	while (align_bytes >= QWORD_BYTES) {
> > +		left_in_page -= src & DMA_ASE_WINDOW_MASK;
> > +		size_to_copy =
> > +			MIN(left_in_page, (align_bytes & ~(QWORD_BYTES -
> > 1)));
> > +		if (size_to_copy < QWORD_BYTES)
> > +			break;
> > +		switch_ase_page(ctx, src);
> > +		offset = src & DMA_ASE_WINDOW_MASK;
> > +		blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
> > +			(uint64_t *)(uintptr_t)dst, size_to_copy);
> > +		src += size_to_copy;
> > +		dst += size_to_copy;
> > +		align_bytes -= size_to_copy;
> > +	}
> > +
> > +	if (align_bytes >= DWORD_BYTES) {
> > +		/* Read remaining DWORD */
> > +		switch_ase_page(ctx, src);
> > +		offset = src & DMA_ASE_WINDOW_MASK;
> > +		*(uint32_t *)(uintptr_t)dst =
> > +			rte_read32(ctx->ase_data_addr + offset);
> > +		src += DWORD_BYTES;
> > +		dst += DWORD_BYTES;
> > +		align_bytes -= DWORD_BYTES;
> > +	}
> > +
> > +	*src_ptr = src;
> > +	*dst_ptr = dst;
> > +	*count = align_bytes;
> > +
> > +	return 0;
> > +}
> > +
> > +static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
> > +	uint64_t *dst_ptr, uint64_t count)
> > +{
> > +	uint64_t src = *src_ptr;
> > +	uint64_t dst = *dst_ptr;
> > +	uint64_t count_left = count;
> > +	uint64_t unaligned_size = 0;
> > +	int ret = 0;
> > +
> > +	AFU_MF_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64"
> (0x%"PRIx64")",
> > src, dst,
> > +		count);
> > +
> > +	/* Aligns address to 8 byte using src masking method */
> > +	if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
> > +		unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
> > +		if (unaligned_size > count_left)
> > +			unaligned_size = count_left;
> > +		ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
> > +		if (ret)
> > +			return ret;
> > +		count_left -= unaligned_size;
> > +		dst += unaligned_size;
> > +		src += unaligned_size;
> > +	}
> > +
> > +	/* Handles 8/4 byte MMIO transfer */
> > +	ret = ase_read(ctx, &src, &dst, &count_left);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* Left over unaligned bytes transferred using src masking method */
> > +	unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
> > +	if (unaligned_size > count_left)
> > +		unaligned_size = count_left;
> > +
> > +	ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
> > +	if (ret)
> > +		return ret;
> > +
> > +	count_left -= unaligned_size;
> > +	*dst_ptr = dst + unaligned_size;
> > +	*src_ptr = src + unaligned_size;
> > +
> > +	return 0;
> > +}
> > +
> > +static void clear_interrupt(struct dma_afu_ctx *ctx)
> > +{
> > +	/* clear interrupt by writing 1 to IRQ bit in status register */
> > +	msgdma_status status;
> > +
> > +	if (!ctx)
> > +		return;
> > +
> > +	status.csr = 0;
> > +	status.irq = 1;
> > +	rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
> > +}
> > +
> > +static int poll_interrupt(struct dma_afu_ctx *ctx)
> > +{
> > +	struct pollfd pfd = {0};
> > +	uint64_t count = 0;
> > +	ssize_t bytes_read = 0;
> > +	int poll_ret = 0;
> > +	int ret = 0;
> > +
> > +	if (!ctx || (ctx->event_fd < 0))
> > +		return -EINVAL;
> > +
> > +	pfd.fd = ctx->event_fd;
> > +	pfd.events = POLLIN;
> > +	poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
> > +	if (poll_ret < 0) {
> > +		AFU_MF_PMD_ERR("Error %s", strerror(errno));
> > +		ret = -EFAULT;
> > +		goto out;
> > +	} else if (poll_ret == 0) {
> > +		AFU_MF_PMD_ERR("Timeout");
> > +		ret = -ETIMEDOUT;
> > +	} else {
> > +		bytes_read = read(pfd.fd, &count, sizeof(count));
> > +		if (bytes_read > 0) {
> > +			if (ctx->verbose)
> > +				AFU_MF_PMD_DEBUG("Successful, ret %d,
> cnt
> > %"PRIu64,
> > +					poll_ret, count);
> > +			ret = 0;
> > +		} else {
> > +			AFU_MF_PMD_ERR("Failed %s", bytes_read > 0 ?
> > +				strerror(errno) : "zero bytes read");
> > +			ret = -EIO;
> > +		}
> > +	}
> > +out:
> > +	clear_interrupt(ctx);
> > +	return ret;
> > +}
> > +
> > +static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc
> *desc)
> > +{
> > +	msgdma_status status;
> > +	uint64_t fpga_queue_full = 0;
> > +
> > +	if (!ctx)
> > +		return;
> > +
> > +	if (ctx->verbose) {
> > +		AFU_MF_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
> > +			desc->rd_address_ext, desc->rd_address);
> > +		AFU_MF_PMD_DEBUG("descriptor.wr_address =
> 0x%x%08x",
> > +			desc->wr_address_ext, desc->wr_address);
> > +		AFU_MF_PMD_DEBUG("descriptor.len = %u", desc->len);
> > +		AFU_MF_PMD_DEBUG("descriptor.wr_burst_count = %u",
> > +			desc->wr_burst_count);
> > +		AFU_MF_PMD_DEBUG("descriptor.rd_burst_count = %u",
> > +			desc->rd_burst_count);
> > +		AFU_MF_PMD_DEBUG("descriptor.wr_stride %u", desc-
> > >wr_stride);
> > +		AFU_MF_PMD_DEBUG("descriptor.rd_stride %u", desc-
> > >rd_stride);
> > +	}
> > +
> > +	do {
> > +		status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
> > +		if (fpga_queue_full++ > 100000000) {
> > +			AFU_MF_PMD_DEBUG("DMA queue full retry");
> > +			fpga_queue_full = 0;
> > +		}
> > +	} while (status.desc_buf_full);
> > +
> > +	blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
> > +		sizeof(*desc));
> > +}
> > +
> > +static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
> > +	int count, int is_last_desc, fpga_dma_type type, int intr_en)
> > +{
> > +	msgdma_ext_desc *desc = NULL;
> > +	int alignment_offset = 0;
> > +	int segment_size = 0;
> > +
> > +	if (!ctx)
> > +		return -EINVAL;
> > +
> > +	/* src, dst and count must be 64-byte aligned */
> > +	if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
> > +		!IS_DMA_ALIGNED(count))
> > +		return -EINVAL;
> > +	memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
> > +
> > +	/* these fields are fixed for all DMA transfers */
> > +	desc = ctx->desc_buf;
> > +	desc->seq_num = 0;
> > +	desc->wr_stride = 1;
> > +	desc->rd_stride = 1;
> > +	desc->control.go = 1;
> > +	if (intr_en)
> > +		desc->control.transfer_irq_en = 1;
> > +	else
> > +		desc->control.transfer_irq_en = 0;
> > +
> > +	if (!is_last_desc)
> > +		desc->control.early_done_en = 1;
> > +	else
> > +		desc->control.early_done_en = 0;
> > +
> > +	if (type == FPGA_TO_FPGA) {
> > +		desc->rd_address = src & DMA_MASK_32_BIT;
> > +		desc->wr_address = dst & DMA_MASK_32_BIT;
> > +		desc->len = count;
> > +		desc->wr_burst_count = 4;
> > +		desc->rd_burst_count = 4;
> > +		desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
> > +		desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
> > +		send_descriptor(ctx, desc);
> > +	} else {
> > +		/* check CCIP (host) address is aligned to 4CL (256B) */
> > +		alignment_offset = (type ==