* [PATCH 1/4] dma/acc: add probe and remove
2025-08-27 9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
@ 2025-08-27 9:27 ` Chengwen Feng
2025-08-27 9:27 ` [PATCH 2/4] dma/acc: add control path ops Chengwen Feng
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Chengwen Feng @ 2025-08-27 9:27 UTC (permalink / raw)
To: thomas, liuyonglong; +Cc: dev
This patch adds probe and remove operation for accelerator DMA driver.
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
MAINTAINERS | 4 +
drivers/dma/acc/acc_dmadev.c | 281 +++++++++++++++++++++++++++++++++++
drivers/dma/acc/acc_dmadev.h | 53 +++++++
drivers/dma/acc/meson.build | 21 +++
drivers/dma/meson.build | 1 +
5 files changed, 360 insertions(+)
create mode 100644 drivers/dma/acc/acc_dmadev.c
create mode 100644 drivers/dma/acc/acc_dmadev.h
create mode 100644 drivers/dma/acc/meson.build
diff --git a/MAINTAINERS b/MAINTAINERS
index 7aca98c537..42717363a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1363,6 +1363,10 @@ M: Chengwen Feng <fengchengwen@huawei.com>
F: drivers/dma/hisilicon/
F: doc/guides/dmadevs/hisilicon.rst
+HiSilicon Accelerator DMA
+M: Chengwen Feng <fengchengwen@huawei.com>
+F: drivers/dma/acc/
+
Marvell CNXK DPI DMA
M: Vamsi Attunuru <vattunuru@marvell.com>
T: git://dpdk.org/next/dpdk-next-net-mrvl
diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
new file mode 100644
index 0000000000..b479d52c91
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <rte_byteorder.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include <rte_dmadev_pmd.h>
+
+#include "acc_dmadev.h"
+
+RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
+#define RTE_LOGTYPE_ACC_DMA acc_dma_logtype
+#define ACC_DMA_LOG(level, ...) \
+ RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s(): ", __func__, __VA_ARGS__)
+#define ACC_DMA_DEV_LOG(hw, level, ...) \
+ RTE_LOG_LINE_PREFIX(level, ACC_DMA, "%s %s(): ", \
+ (hw)->data->dev_name RTE_LOG_COMMA __func__, __VA_ARGS__)
+#define ACC_DMA_DEBUG(hw, ...) \
+ ACC_DMA_DEV_LOG(hw, DEBUG, __VA_ARGS__)
+#define ACC_DMA_INFO(hw, ...) \
+ ACC_DMA_DEV_LOG(hw, INFO, __VA_ARGS__)
+#define ACC_DMA_WARN(hw, ...) \
+ ACC_DMA_DEV_LOG(hw, WARNING, __VA_ARGS__)
+#define ACC_DMA_ERR(hw, ...) \
+ ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+
+static void
+acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
+ uint16_t queue_id, char *dev_name, size_t size)
+{
+ memset(dev_name, 0, size);
+ (void)snprintf(dev_name, size, "%s-dma%u", uacce_dev->device.name, queue_id);
+}
+
+static int
+acc_dma_get_qp_info(struct acc_dma_dev *hw)
+{
+#define CMD_QM_GET_QP_CTX _IOWR('H', 10, struct acc_dma_qp_contex)
+#define CMD_QM_GET_QP_INFO _IOWR('H', 11, struct acc_dma_qp_info)
+#define QP_ALG_TYPE 2
+ struct acc_dma_qp_contex {
+ uint16_t id;
+ uint16_t qc_type;
+ } qp_ctx;
+ struct acc_dma_qp_info {
+ uint32_t sqe_size;
+ uint16_t sq_depth;
+ uint16_t cq_depth;
+ uint64_t reserved;
+ } qp_info;
+ int ret;
+
+ memset(&qp_ctx, 0, sizeof(qp_ctx));
+ qp_ctx.qc_type = QP_ALG_TYPE;
+ ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_CTX, &qp_ctx);
+ if (ret != 0) {
+ ACC_DMA_ERR(hw, "get qm qp context fail!");
+ return -EINVAL;
+ }
+ hw->sqn = qp_ctx.id;
+
+ memset(&qp_info, 0, sizeof(qp_info));
+ ret = rte_uacce_queue_ioctl(&hw->qctx, CMD_QM_GET_QP_INFO, &qp_info);
+ if (ret != 0) {
+ ACC_DMA_ERR(hw, "get qm qp info fail!");
+ return -EINVAL;
+ }
+ if ((qp_info.sq_depth & (qp_info.sq_depth - 1)) != 0) {
+ ACC_DMA_ERR(hw, "sq depth is not 2's power!");
+ return -EINVAL;
+ }
+ hw->sqe_size = qp_info.sqe_size;
+ hw->sq_depth = qp_info.sq_depth;
+ hw->cq_depth = qp_info.cq_depth;
+ hw->sq_depth_mask = hw->sq_depth - 1;
+
+ return 0;
+}
+
+static int
+acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
+{
+ char name[RTE_DEV_NAME_MAX_LEN];
+ struct rte_dma_dev *dev;
+ struct acc_dma_dev *hw;
+ int ret;
+
+ acc_dma_gen_dev_name(uacce_dev, queue_id, name, sizeof(name));
+ dev = rte_dma_pmd_allocate(name, uacce_dev->device.numa_node,
+ sizeof(struct acc_dma_dev));
+ if (dev == NULL) {
+ ACC_DMA_LOG(ERR, "%s allocate dmadev fail!", name);
+ return -EINVAL;
+ }
+
+ dev->device = &uacce_dev->device;
+ dev->fp_obj->dev_private = dev->data->dev_private;
+
+ hw = dev->data->dev_private;
+ hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
+
+ ret = rte_uacce_queue_alloc(uacce_dev, &hw->qctx);
+ if (ret != 0) {
+ ACC_DMA_ERR(hw, "alloc queue fail!");
+ goto release_dma_pmd;
+ }
+
+ ret = acc_dma_get_qp_info(hw);
+ if (ret != 0)
+ goto free_uacce_queue;
+
+ hw->io_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+ if (hw->io_base == NULL) {
+ ACC_DMA_ERR(hw, "mmap MMIO region fail!");
+ ret = -EINVAL;
+ goto free_uacce_queue;
+ }
+ hw->doorbell_reg = (void *)((uintptr_t)hw->io_base + 0x1000);
+
+ hw->dus_base = rte_uacce_queue_mmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+ if (hw->dus_base == NULL) {
+ ACC_DMA_ERR(hw, "mmap DUS region fail!");
+ ret = -EINVAL;
+ goto unmap_mmio;
+ }
+ hw->sqe = hw->dus_base;
+ hw->cqe = (void *)((uintptr_t)hw->dus_base + hw->sqe_size * hw->sq_depth);
+ hw->sq_status = (uint32_t *)((uintptr_t)hw->dus_base +
+ uacce_dev->qfrt_sz[RTE_UACCE_QFRT_DUS] - sizeof(uint32_t));
+ hw->cq_status = hw->sq_status - 1;
+
+ hw->status = rte_zmalloc_socket(NULL, sizeof(uint16_t) * hw->sq_depth,
+ RTE_CACHE_LINE_SIZE, uacce_dev->numa_node);
+ if (hw->status == NULL) {
+ ACC_DMA_ERR(hw, "malloc status region fail!");
+ ret = -ENOMEM;
+ goto unmap_dus;
+ }
+
+ dev->state = RTE_DMA_DEV_READY;
+ ACC_DMA_DEBUG(hw, "create dmadev %s success!", name);
+
+ return 0;
+
+unmap_dus:
+ rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+unmap_mmio:
+ rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+free_uacce_queue:
+ rte_uacce_queue_free(&hw->qctx);
+release_dma_pmd:
+ rte_dma_pmd_release(name);
+ return ret;
+}
+
+static int
+acc_dma_parse_queues(const char *key, const char *value, void *extra_args)
+{
+ struct acc_dma_config *config = extra_args;
+ uint64_t val;
+ char *end;
+
+ RTE_SET_USED(key);
+
+ errno = 0;
+ val = strtoull(value, &end, 0);
+ if (errno == ERANGE || value == end || *end != '\0' || val == 0) {
+ ACC_DMA_LOG(ERR, "%s invalid queues! set to default one queue!",
+ config->dev->name);
+ config->queues = ACC_DMA_DEFAULT_QUEUES;
+ } else if (val > config->avail_queues) {
+ ACC_DMA_LOG(WARNING, "%s exceed available queues! set to available queues",
+ config->dev->name);
+ config->queues = config->avail_queues;
+ } else {
+ config->queues = val;
+ }
+
+ return 0;
+}
+
+static int
+acc_dma_parse_devargs(struct rte_uacce_device *uacce_dev, struct acc_dma_config *config)
+{
+ struct rte_kvargs *kvlist;
+ int avail_queues;
+
+ avail_queues = rte_uacce_avail_queues(uacce_dev);
+ if (avail_queues <= 0) {
+ ACC_DMA_LOG(ERR, "%s don't have available queues!", uacce_dev->name);
+ return -1;
+ }
+ config->dev = uacce_dev;
+ config->avail_queues = avail_queues <= UINT16_MAX ? avail_queues : UINT16_MAX;
+
+ if (uacce_dev->device.devargs == NULL)
+ return 0;
+
+ kvlist = rte_kvargs_parse(uacce_dev->device.devargs->args, NULL);
+ if (kvlist == NULL)
+ return 0;
+
+ (void)rte_kvargs_process(kvlist, ACC_DMA_DEVARG_QUEUES, &acc_dma_parse_queues, config);
+
+ rte_kvargs_free(kvlist);
+
+ return 0;
+}
+
+static int
+acc_dma_probe(struct rte_uacce_driver *dr, struct rte_uacce_device *uacce_dev)
+{
+ struct acc_dma_config config = { .queues = ACC_DMA_DEFAULT_QUEUES };
+ int ret = 0;
+ uint32_t i;
+
+ RTE_SET_USED(dr);
+
+ ret = acc_dma_parse_devargs(uacce_dev, &config);
+ if (ret != 0)
+ return ret;
+
+ for (i = 0; i < config.queues; i++) {
+ ret = acc_dma_create(uacce_dev, i);
+ if (ret != 0) {
+ ACC_DMA_LOG(ERR, "%s create dmadev No.%u failed!", uacce_dev->name, i);
+ break;
+ }
+ }
+
+ if (ret != 0 && i > 0) {
+ ACC_DMA_LOG(WARNING, "%s probed %u dmadev, can't probe more!", uacce_dev->name, i);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int
+acc_dma_remove(struct rte_uacce_device *uacce_dev)
+{
+ struct rte_dma_info info;
+ int i = 0;
+ int ret;
+
+ RTE_DMA_FOREACH_DEV(i) {
+ ret = rte_dma_info_get(i, &info);
+ if (ret != 0)
+ continue;
+ if (strncmp(info.dev_name, uacce_dev->device.name,
+ strlen(uacce_dev->device.name)) == 0)
+ rte_dma_pmd_release(info.dev_name);
+ }
+
+ return 0;
+}
+
+static const struct rte_uacce_id acc_dma_id_table[] = {
+ { "hisi_qm_v5", "udma" },
+ { .dev_api = NULL, },
+};
+
+static struct rte_uacce_driver acc_dma_pmd_drv = {
+ .id_table = acc_dma_id_table,
+ .probe = acc_dma_probe,
+ .remove = acc_dma_remove,
+};
+
+RTE_PMD_REGISTER_UACCE(dma_acc, acc_dma_pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(dma_acc,
+ ACC_DMA_DEVARG_QUEUES "=<uint16> ");
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
new file mode 100644
index 0000000000..ce613541c0
--- /dev/null
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef ACC_DMADEV_H
+#define ACC_DMADEV_H
+
+#include <bus_uacce_driver.h>
+#include <rte_bitops.h>
+#include <rte_common.h>
+#include <rte_dmadev_pmd.h>
+
+#define ACC_DMA_DEVARG_QUEUES "queues"
+#define ACC_DMA_DEFAULT_QUEUES 1
+
+struct acc_dma_config {
+ uint16_t queues;
+
+ /* The following fields are config contexts. */
+ struct rte_uacce_device *dev;
+ uint16_t avail_queues;
+};
+
+struct acc_dma_sqe {};
+struct acc_dma_cqe {};
+
+struct acc_dma_dev {
+ struct acc_dma_sqe *sqe;
+ struct acc_dma_cqe *cqe;
+ uint16_t *status; /* the completion status array of SQEs. */
+
+ volatile void *doorbell_reg; /**< register address for doorbell. */
+ volatile uint32_t *sq_status; /**< SQ status pointer. */
+ volatile uint32_t *cq_status; /**< CQ status pointer. */
+
+ uint16_t sqn; /**< SQ global number, inited when created. */
+ uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+
+ uint16_t cq_depth; /**< CQ depth, inited when created. */
+
+ /**
+ * The following fields are not accessed in the I/O path, so they are
+ * placed at the end.
+ */
+ struct rte_dma_dev_data *data;
+ struct rte_uacce_qcontex qctx;
+ void *io_base;
+ void *dus_base;
+ uint32_t sqe_size;
+ uint16_t sq_depth;
+};
+
+#endif /* ACC_DMADEV_H */
diff --git a/drivers/dma/acc/meson.build b/drivers/dma/acc/meson.build
new file mode 100644
index 0000000000..8a1bad5281
--- /dev/null
+++ b/drivers/dma/acc/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+if not is_linux
+ build = false
+ reason = 'only supported on Linux'
+ subdir_done()
+endif
+
+if (arch_subdir != 'x86' and arch_subdir != 'arm') or (not dpdk_conf.get('RTE_ARCH_64'))
+ build = false
+ reason = 'only supported on x86_64 and aarch64'
+ subdir_done()
+endif
+
+deps += ['bus_uacce', 'dmadev']
+sources = files(
+ 'acc_dmadev.c',
+)
+
+require_iova_in_mbuf = false
diff --git a/drivers/dma/meson.build b/drivers/dma/meson.build
index 358132759a..eeab0ec361 100644
--- a/drivers/dma/meson.build
+++ b/drivers/dma/meson.build
@@ -2,6 +2,7 @@
# Copyright 2021 HiSilicon Limited
drivers = [
+ 'acc',
'cnxk',
'dpaa',
'dpaa2',
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/4] dma/acc: add control path ops
2025-08-27 9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-08-27 9:27 ` [PATCH 1/4] dma/acc: add probe and remove Chengwen Feng
@ 2025-08-27 9:27 ` Chengwen Feng
2025-08-27 9:27 ` [PATCH 3/4] dma/acc: add data " Chengwen Feng
2025-08-27 9:27 ` [PATCH 4/4] dma/acc: add doc Chengwen Feng
3 siblings, 0 replies; 5+ messages in thread
From: Chengwen Feng @ 2025-08-27 9:27 UTC (permalink / raw)
To: thomas, liuyonglong; +Cc: dev
This commit adds control path ops for accelerator DMA driver.
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
drivers/dma/acc/acc_dmadev.c | 156 +++++++++++++++++++++++++++++++++++
drivers/dma/acc/acc_dmadev.h | 42 ++++++++++
2 files changed, 198 insertions(+)
diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index b479d52c91..ce2f45cedb 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -34,6 +34,161 @@ RTE_LOG_REGISTER_DEFAULT(acc_dma_logtype, INFO);
#define ACC_DMA_ERR(hw, ...) \
ACC_DMA_DEV_LOG(hw, ERR, __VA_ARGS__)
+static int
+acc_dma_info_get(const struct rte_dma_dev *dev,
+ struct rte_dma_info *dev_info,
+ uint32_t info_sz)
+{
+ struct acc_dma_dev *hw = dev->data->dev_private;
+
+ RTE_SET_USED(info_sz);
+
+ dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
+ RTE_DMA_CAPA_SVA |
+ RTE_DMA_CAPA_OPS_COPY |
+ RTE_DMA_CAPA_OPS_FILL;
+ dev_info->max_vchans = 1;
+ dev_info->max_desc = hw->sq_depth;
+ dev_info->min_desc = hw->sq_depth;
+
+ return 0;
+}
+
+static int
+acc_dma_configure(struct rte_dma_dev *dev,
+ const struct rte_dma_conf *conf,
+ uint32_t conf_sz)
+{
+ RTE_SET_USED(dev);
+ RTE_SET_USED(conf);
+ RTE_SET_USED(conf_sz);
+ return 0;
+}
+
+static int
+acc_dma_start(struct rte_dma_dev *dev)
+{
+ struct acc_dma_dev *hw = dev->data->dev_private;
+ int ret;
+
+ if (hw->started) {
+ hw->ridx = 0;
+ hw->cridx = 0;
+ return 0;
+ }
+
+ memset(hw->sqe, 0, hw->sqe_size * hw->sq_depth);
+ memset(hw->cqe, 0, sizeof(struct acc_dma_cqe) * hw->cq_depth);
+ memset(hw->status, 0, sizeof(uint16_t) * hw->sq_depth);
+ hw->ridx = 0;
+ hw->cridx = 0;
+ hw->sq_head = 0;
+ hw->sq_tail = 0;
+ hw->cq_sq_head = 0;
+ hw->avail_sqes = hw->sq_depth - ACC_DMA_SQ_GAP_NUM - 1;
+ hw->cq_head = 0;
+ hw->cqs_completed = 0;
+ hw->cqe_vld = 1;
+ hw->submitted = 0;
+ hw->completed = 0;
+ hw->errors = 0;
+ hw->invalid_lens = 0;
+ hw->qfulls = 0;
+
+ ret = rte_uacce_queue_start(&hw->qctx);
+ if (ret == 0)
+ hw->started = true;
+
+ return ret;
+}
+
+static int
+acc_dma_stop(struct rte_dma_dev *dev)
+{
+ RTE_SET_USED(dev);
+ return 0;
+}
+
+static int
+acc_dma_close(struct rte_dma_dev *dev)
+{
+ struct acc_dma_dev *hw = dev->data->dev_private;
+ /* The dmadev already stopped */
+ rte_free(hw->status);
+ rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_DUS);
+ rte_uacce_queue_unmap(&hw->qctx, RTE_UACCE_QFRT_MMIO);
+ rte_uacce_queue_free(&hw->qctx);
+ return 0;
+}
+
+static int
+acc_dma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
+ const struct rte_dma_vchan_conf *conf,
+ uint32_t conf_sz)
+{
+ RTE_SET_USED(dev);
+ RTE_SET_USED(vchan);
+ RTE_SET_USED(conf);
+ RTE_SET_USED(conf_sz);
+ return 0;
+}
+
+static int
+acc_dma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
+ struct rte_dma_stats *stats,
+ uint32_t stats_sz)
+{
+ struct acc_dma_dev *hw = dev->data->dev_private;
+
+ RTE_SET_USED(vchan);
+ RTE_SET_USED(stats_sz);
+ stats->submitted = hw->submitted;
+ stats->completed = hw->completed;
+ stats->errors = hw->errors;
+
+ return 0;
+}
+
+static int
+acc_dma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
+{
+ struct acc_dma_dev *hw = dev->data->dev_private;
+
+ RTE_SET_USED(vchan);
+ hw->submitted = 0;
+ hw->completed = 0;
+ hw->errors = 0;
+ hw->invalid_lens = 0;
+ hw->io_errors = 0;
+ hw->qfulls = 0;
+
+ return 0;
+}
+
+static int
+acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
+{
+ struct acc_dma_dev *hw = dev->data->dev_private;
+
+ fprintf(f, " sqn: %u sq_status: %s cq_status: %s\n"
+ " sqe_size: %u sq_depth: %u sq_depth_mask: %u cq_depth: %u\n",
+ hw->sqn, (*hw->sq_status != 0) ? "ERR" : "OK",
+ (*hw->cq_status != 0) ? "ERR" : "OK",
+ hw->sqe_size, hw->sq_depth, hw->sq_depth_mask, hw->cq_depth);
+ fprintf(f, " ridx: %u cridx: %u\n"
+ " sq_head: %u sq_tail: %u cq_sq_head: %u avail_sqes: %u\n"
+ " cq_head: %u cqs_completed: %u cqe_vld: %u\n",
+ hw->ridx, hw->cridx,
+ hw->sq_head, hw->sq_tail, hw->cq_sq_head, hw->avail_sqes,
+ hw->cq_head, hw->cqs_completed, hw->cqe_vld);
+ fprintf(f, " submitted: %" PRIu64 " completed: %" PRIu64 " errors: %" PRIu64
+ " invalid_lens: %" PRIu64 " io_errors: %" PRIu64 " qfulls: %" PRIu64 "\n",
+ hw->submitted, hw->completed, hw->errors, hw->invalid_lens,
+ hw->io_errors, hw->qfulls);
+
+ return 0;
+}
+
static void
acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
uint16_t queue_id, char *dev_name, size_t size)
@@ -104,6 +259,7 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
}
dev->device = &uacce_dev->device;
+ dev->dev_ops = &acc_dmadev_ops;
dev->fp_obj->dev_private = dev->data->dev_private;
hw = dev->data->dev_private;
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index ce613541c0..b87626c244 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -13,6 +13,9 @@
#define ACC_DMA_DEVARG_QUEUES "queues"
#define ACC_DMA_DEFAULT_QUEUES 1
+#define ACC_DMA_CQ_DOORBELL_PACE 64
+#define ACC_DMA_SQ_GAP_NUM ACC_DMA_CQ_DOORBELL_PACE
+
struct acc_dma_config {
uint16_t queues;
@@ -36,7 +39,45 @@ struct acc_dma_dev {
uint16_t sqn; /**< SQ global number, inited when created. */
uint16_t sq_depth_mask; /**< SQ depth - 1, the SQ depth is power of 2. */
+ uint16_t ridx; /**< ring index which will assign to the next request. */
+ uint16_t cridx; /**< ring index which returned by completed APIs. */
+
+ /**
+ * SQE array management fields:
+ *
+ * -----------------------------------------------------
+ * | SQE0 | SQE1 | SQE2 | ... | SQEx | ... | SQEn-1 |
+ * -----------------------------------------------------
+ * ^ ^ ^
+ * | | |
+ * sq_head cq_sq_head sq_tail
+ *
+ * sq_head: index to the oldest completed request, this filed was
+ * updated by completed* APIs.
+ * sq_tail: index of the next new request, this field was updated by
+ * copy API.
+ * cq_sq_head: next index of index that has been completed by hardware,
+ * this filed was updated by completed* APIs.
+ *
+ * [sq_head, cq_sq_head): the SQEs that hardware already completed.
+ * [cq_sq_head, sq_tail): the SQEs that hardware processing.
+ */
+ uint16_t sq_head;
+ uint16_t sq_tail;
+ uint16_t cq_sq_head;
+ uint16_t avail_sqes;
+
uint16_t cq_depth; /**< CQ depth, inited when created. */
+ uint16_t cq_head; /**< CQ index for next scans. */
+ uint16_t cqs_completed; /**< accumulated number of completed CQs. */
+ uint8_t cqe_vld; /**< valid bit for CQE, will change for every round. */
+
+ uint64_t submitted;
+ uint64_t completed;
+ uint64_t errors;
+ uint64_t invalid_lens;
+ uint64_t io_errors;
+ uint64_t qfulls;
/**
* The following fields are not accessed in the I/O path, so they are
@@ -48,6 +89,7 @@ struct acc_dma_dev {
void *dus_base;
uint32_t sqe_size;
uint16_t sq_depth;
+ bool started;
};
#endif /* ACC_DMADEV_H */
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/4] dma/acc: add data path ops
2025-08-27 9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
2025-08-27 9:27 ` [PATCH 1/4] dma/acc: add probe and remove Chengwen Feng
2025-08-27 9:27 ` [PATCH 2/4] dma/acc: add control path ops Chengwen Feng
@ 2025-08-27 9:27 ` Chengwen Feng
2025-08-27 9:27 ` [PATCH 4/4] dma/acc: add doc Chengwen Feng
3 siblings, 0 replies; 5+ messages in thread
From: Chengwen Feng @ 2025-08-27 9:27 UTC (permalink / raw)
To: thomas, liuyonglong; +Cc: dev
This commit adds data path ops for accelerator DMA driver.
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
drivers/dma/acc/acc_dmadev.c | 284 +++++++++++++++++++++++++++++++++++
drivers/dma/acc/acc_dmadev.h | 65 +++++++-
2 files changed, 347 insertions(+), 2 deletions(-)
diff --git a/drivers/dma/acc/acc_dmadev.c b/drivers/dma/acc/acc_dmadev.c
index ce2f45cedb..12201ba571 100644
--- a/drivers/dma/acc/acc_dmadev.c
+++ b/drivers/dma/acc/acc_dmadev.c
@@ -189,6 +189,284 @@ acc_dma_dump(const struct rte_dma_dev *dev, FILE *f)
return 0;
}
+static inline void
+acc_dma_sq_doorbell(struct acc_dma_dev *hw)
+{
+#define SQ_CMD 0ull
+ uint64_t doorbell = (uint64_t)hw->sqn | (SQ_CMD << 12) |
+ (((uint64_t)hw->sq_tail) << 32);
+ rte_io_wmb();
+ *(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static int
+acc_dma_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+ uint32_t length, uint64_t flags)
+{
+ struct acc_dma_dev *hw = dev_private;
+ struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+ RTE_SET_USED(vchan);
+
+ if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+ hw->invalid_lens++;
+ return -EINVAL;
+ }
+
+ if (unlikely(*hw->sq_status != 0)) {
+ hw->io_errors++;
+ return -EIO;
+ }
+
+ if (hw->avail_sqes == 0) {
+ hw->qfulls++;
+ return -ENOSPC;
+ }
+
+ sqe->bd_type = ACC_DMA_SQE_TYPE;
+ sqe->task_type = ACC_DMA_TASK_TYPE;
+ sqe->task_type_ext = ACC_DMA_DATA_MEMCPY;
+ sqe->init_val = 0;
+ sqe->addr_array = src;
+ sqe->dst_addr = dst;
+ sqe->data_size = length;
+ sqe->dw0 = ACC_DMA_SVA_PREFETCH_EN;
+
+ hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+ hw->avail_sqes--;
+ hw->submitted++;
+
+ if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+ acc_dma_sq_doorbell(hw);
+
+ return hw->ridx++;
+}
+
+static int
+acc_dma_fill(void *dev_private, uint16_t vchan, uint64_t pattern,
+ rte_iova_t dst, uint32_t length, uint64_t flags)
+{
+ struct acc_dma_dev *hw = dev_private;
+ struct acc_dma_sqe *sqe = &hw->sqe[hw->sq_tail];
+
+ RTE_SET_USED(vchan);
+
+ if (unlikely(length > ACC_DMA_MAX_OP_SIZE)) {
+ hw->invalid_lens++;
+ return -EINVAL;
+ }
+
+ if (unlikely(*hw->sq_status != 0)) {
+ hw->io_errors++;
+ return -EIO;
+ }
+
+ if (hw->avail_sqes == 0) {
+ hw->qfulls++;
+ return -ENOSPC;
+ }
+
+ sqe->bd_type = ACC_DMA_SQE_TYPE;
+ sqe->task_type = ACC_DMA_TASK_TYPE;
+ sqe->task_type_ext = ACC_DMA_DATA_MEMSET;
+ sqe->init_val = pattern;
+ sqe->addr_array = 0;
+ sqe->dst_addr = dst;
+ sqe->data_size = length;
+ sqe->dw0 = ACC_DMA_SVA_PREFETCH_EN;
+
+ hw->sq_tail = (hw->sq_tail + 1) & hw->sq_depth_mask;
+ hw->avail_sqes--;
+ hw->submitted++;
+
+ if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+ acc_dma_sq_doorbell(hw);
+
+ return hw->ridx++;
+}
+
+static int
+acc_dma_submit(void *dev_private, uint16_t vchan)
+{
+ struct acc_dma_dev *hw = dev_private;
+
+ RTE_SET_USED(vchan);
+
+ if (unlikely(*hw->sq_status != 0)) {
+ hw->io_errors++;
+ return -EIO;
+ }
+
+ acc_dma_sq_doorbell(hw);
+
+ return 0;
+}
+
+static inline void
+acc_dma_cq_doorbell(struct acc_dma_dev *hw)
+{
+#define CQ_CMD 1ull
+ uint64_t doorbell = (uint64_t)hw->sqn | (CQ_CMD << 12) |
+ (((uint64_t)hw->cq_head) << 32);
+ rte_io_wmb();
+ *(volatile uint64_t *)hw->doorbell_reg = doorbell;
+}
+
+static inline void
+acc_dma_scan_cq(struct acc_dma_dev *hw)
+{
+ volatile struct acc_dma_cqe *cqe;
+ struct acc_dma_sqe *sqe;
+ uint16_t csq_head = hw->cq_sq_head;
+ uint16_t cq_head = hw->cq_head;
+ uint16_t count = 0;
+ uint64_t misc;
+
+ if (unlikely(*hw->cq_status != 0)) {
+ hw->io_errors++;
+ return;
+ }
+
+ while (count < hw->cq_depth) {
+ cqe = &hw->cqe[cq_head];
+ misc = cqe->misc;
+ misc = rte_le_to_cpu_64(misc);
+ if (RTE_FIELD_GET64(ACC_DMA_CQE_VALID_B, misc) != hw->cqe_vld)
+ break;
+
+ csq_head = RTE_FIELD_GET64(ACC_DMA_SQ_HEAD_MASK, misc);
+ if (unlikely(csq_head > hw->sq_depth_mask)) {
+ /**
+ * Defensive programming to prevent overflow of the
+ * status array indexed by csq_head. Only error logs
+ * are used for prompting.
+ */
+ ACC_DMA_ERR(hw, "invalid csq_head: %u!", csq_head);
+ count = 0;
+ break;
+ }
+ sqe = &hw->sqe[csq_head];
+ if (sqe->done_flag != ACC_DMA_TASK_DONE ||
+ sqe->err_type || sqe->ext_err_type || sqe->wtype) {
+ hw->status[csq_head] = RTE_DMA_STATUS_ERROR_UNKNOWN;
+ }
+
+ count++;
+ cq_head++;
+ if (cq_head == hw->cq_depth) {
+ hw->cqe_vld = !hw->cqe_vld;
+ cq_head = 0;
+ }
+ }
+
+ if (count == 0)
+ return;
+
+ hw->cq_head = cq_head;
+ hw->cq_sq_head = (csq_head + 1) & hw->sq_depth_mask;
+ hw->avail_sqes += count;
+ hw->cqs_completed += count;
+ if (hw->cqs_completed >= ACC_DMA_CQ_DOORBELL_PACE) {
+ acc_dma_cq_doorbell(hw);
+ hw->cqs_completed = 0;
+ }
+}
+
+static inline uint16_t
+acc_dma_calc_cpls(struct acc_dma_dev *hw, const uint16_t nb_cpls)
+{
+ uint16_t cpl_num;
+
+ if (hw->cq_sq_head >= hw->sq_head)
+ cpl_num = hw->cq_sq_head - hw->sq_head;
+ else
+ cpl_num = hw->sq_depth_mask + 1 - hw->sq_head + hw->cq_sq_head;
+
+ if (cpl_num > nb_cpls)
+ cpl_num = nb_cpls;
+
+ return cpl_num;
+}
+
+static uint16_t
+acc_dma_completed(void *dev_private,
+ uint16_t vchan, const uint16_t nb_cpls,
+ uint16_t *last_idx, bool *has_error)
+{
+ struct acc_dma_dev *hw = dev_private;
+ uint16_t sq_head = hw->sq_head;
+ uint16_t cpl_num, i;
+
+ RTE_SET_USED(vchan);
+ acc_dma_scan_cq(hw);
+
+ cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+ for (i = 0; i < cpl_num; i++) {
+ if (hw->status[sq_head]) {
+ *has_error = true;
+ break;
+ }
+ sq_head = (sq_head + 1) & hw->sq_depth_mask;
+ }
+ *last_idx = hw->cridx + i - 1;
+ if (i > 0) {
+ hw->cridx += i;
+ hw->sq_head = sq_head;
+ hw->completed += i;
+ }
+
+ return i;
+}
+
+static uint16_t
+acc_dma_completed_status(void *dev_private,
+ uint16_t vchan, const uint16_t nb_cpls,
+ uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+ struct acc_dma_dev *hw = dev_private;
+ uint16_t sq_head = hw->sq_head;
+ uint16_t cpl_num, i;
+
+ RTE_SET_USED(vchan);
+ acc_dma_scan_cq(hw);
+
+ cpl_num = acc_dma_calc_cpls(hw, nb_cpls);
+ for (i = 0; i < cpl_num; i++) {
+ status[i] = hw->status[sq_head];
+ hw->errors += !!status[i];
+ hw->status[sq_head] = 0;
+ sq_head = (sq_head + 1) & hw->sq_depth_mask;
+ }
+ *last_idx = hw->cridx + cpl_num - 1;
+ if (likely(cpl_num > 0)) {
+ hw->cridx += cpl_num;
+ hw->sq_head = sq_head;
+ hw->completed += cpl_num;
+ }
+
+ return cpl_num;
+}
+
+static uint16_t
+acc_dma_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+ const struct acc_dma_dev *hw = dev_private;
+ RTE_SET_USED(vchan);
+ return hw->avail_sqes;
+}
+
+static const struct rte_dma_dev_ops acc_dmadev_ops = {
+ .dev_info_get = acc_dma_info_get,
+ .dev_configure = acc_dma_configure,
+ .dev_start = acc_dma_start,
+ .dev_stop = acc_dma_stop,
+ .dev_close = acc_dma_close,
+ .vchan_setup = acc_dma_vchan_setup,
+ .stats_get = acc_dma_stats_get,
+ .stats_reset = acc_dma_stats_reset,
+ .dev_dump = acc_dma_dump,
+};
+
static void
acc_dma_gen_dev_name(const struct rte_uacce_device *uacce_dev,
uint16_t queue_id, char *dev_name, size_t size)
@@ -261,6 +539,12 @@ acc_dma_create(struct rte_uacce_device *uacce_dev, uint16_t queue_id)
dev->device = &uacce_dev->device;
dev->dev_ops = &acc_dmadev_ops;
dev->fp_obj->dev_private = dev->data->dev_private;
+ dev->fp_obj->copy = acc_dma_copy;
+ dev->fp_obj->fill = acc_dma_fill;
+ dev->fp_obj->submit = acc_dma_submit;
+ dev->fp_obj->completed = acc_dma_completed;
+ dev->fp_obj->completed_status = acc_dma_completed_status;
+ dev->fp_obj->burst_capacity = acc_dma_burst_capacity;
hw = dev->data->dev_private;
hw->data = dev->data; /* make sure ACC_DMA_DEBUG/INFO/WARN/ERR was available. */
diff --git a/drivers/dma/acc/acc_dmadev.h b/drivers/dma/acc/acc_dmadev.h
index b87626c244..2055e968f6 100644
--- a/drivers/dma/acc/acc_dmadev.h
+++ b/drivers/dma/acc/acc_dmadev.h
@@ -24,8 +24,69 @@ struct acc_dma_config {
uint16_t avail_queues;
};
-struct acc_dma_sqe {};
-struct acc_dma_cqe {};
+#define ACC_DMA_TASK_TYPE 0x3
+#define ACC_DMA_SQE_TYPE 0x1
+#define ACC_DMA_SVA_PREFETCH_EN RTE_BIT32(15)
+#define ACC_DMA_MAX_OP_SIZE (RTE_BIT32(24) - 1)
+
+enum {
+ ACC_DMA_DATA_MEMCPY = 0,
+ ACC_DMA_DATA_MEMSET = 7,
+};
+
+enum {
+ ACC_DMA_TASK_DONE = 1,
+ ACC_DMA_TASK_ERROR,
+};
+
+struct acc_dma_sqe {
+ uint32_t bd_type : 6;
+ uint32_t resv1 : 2;
+ uint32_t task_type : 6;
+ uint32_t resv2 : 2;
+ uint32_t task_type_ext : 6;
+ uint32_t resv3 : 9;
+ uint32_t bd_invlid : 1;
+ uint32_t rsv4[2];
+ uint32_t low_tag;
+ uint32_t hi_tag;
+ /* The number of bytes to be copied or filled for single address. */
+ uint32_t data_size;
+ uint32_t rsv5;
+ /*
+ * 0 ~ 13 bits: reserved,
+ * 14 bit: single address or multi addresses,
+ * 15 bit: sva prefetch en.
+ */
+ uint16_t dw0;
+ /*
+ * 0 ~5 bits: reserved,
+ * 6 ~ 13 bits: address num,
+ * 14 ~15 bits: reserved.
+ */
+ uint16_t dw1;
+ uint64_t init_val;
+ uint32_t rsv6[12];
+ /* dst addr for single address task. */
+ uint64_t dst_addr;
+ uint32_t rsv7[2];
+ /* src addr for single address task, addr array for multi addresses. */
+ uint64_t addr_array;
+ uint32_t done_flag : 3;
+ uint32_t rsv8 : 1;
+ uint32_t ext_err_type : 12;
+ uint32_t err_type : 8;
+ uint32_t wtype : 8;
+ uint32_t rsv9[3];
+};
+
+#define ACC_DMA_SQ_HEAD_MASK RTE_GENMASK64(15, 0)
+#define ACC_DMA_CQE_VALID_B RTE_BIT64(48)
+
+struct acc_dma_cqe {
+ uint64_t rsv;
+ uint64_t misc;
+};
struct acc_dma_dev {
struct acc_dma_sqe *sqe;
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 4/4] dma/acc: add doc
2025-08-27 9:27 [PATCH 0/4] add Hisilicon accelerator DMA driver Chengwen Feng
` (2 preceding siblings ...)
2025-08-27 9:27 ` [PATCH 3/4] dma/acc: add data " Chengwen Feng
@ 2025-08-27 9:27 ` Chengwen Feng
3 siblings, 0 replies; 5+ messages in thread
From: Chengwen Feng @ 2025-08-27 9:27 UTC (permalink / raw)
To: thomas, liuyonglong; +Cc: dev
This commit adds document for accelerator DMA driver.
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
MAINTAINERS | 1 +
doc/guides/dmadevs/acc.rst | 63 ++++++++++++++++++++++++++
doc/guides/dmadevs/index.rst | 1 +
doc/guides/rel_notes/release_25_11.rst | 6 +++
4 files changed, 71 insertions(+)
create mode 100644 doc/guides/dmadevs/acc.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index 42717363a0..ca3a8a421b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1366,6 +1366,7 @@ F: doc/guides/dmadevs/hisilicon.rst
HiSilicon Accelerator DMA
M: Chengwen Feng <fengchengwen@huawei.com>
F: drivers/dma/acc/
+F: doc/guides/dmadevs/acc.rst
Marvell CNXK DPI DMA
M: Vamsi Attunuru <vattunuru@marvell.com>
diff --git a/doc/guides/dmadevs/acc.rst b/doc/guides/dmadevs/acc.rst
new file mode 100644
index 0000000000..41fc24877f
--- /dev/null
+++ b/doc/guides/dmadevs/acc.rst
@@ -0,0 +1,63 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+ Copyright (c) 2025 HiSilicon Technologies Co., Ltd. All rights reserved.
+
+HISILICON Accelerator DMA Driver
+================================
+
+Kunpeng SoC has an internal accelerator unit which includes zip function, and
+the zip also supports data copy and fill. This driver exposes this capability to
+DPDK application.
+
+
+Supported Kunpeng SoCs
+----------------------
+
+* Kunpeng 920
+
+
+Device Setup
+-------------
+
+In order to use the device in DPDK, user should insmod uacce.ko, hisi_qm.ko and
+hisi_zip.ko(with module parameter uacce_mode=1), then there will be several
+subdirectories whose names start with hisi_zip in /sys/class/uacce/ directory.
+
+Device Probing and Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+User should use following method to probe device::
+
+ $ dpdk-app -a uacce:hisi_zip-0,queues=2 ...
+
+hisi_zip-0 is the directory name in the /sys/class/uacce/ directory, queues is
+runtime config parameter which indicates how many dmadevs are created.
+
+If the probe is successful, two dmadevs are created, named "hisi_zip-0-dma0"
+and "hisi_zip-0-dma1".
+
+.. note::
+ In the /sys/class/uacce/hisi_zip-x/ directory, user could query api and
+ algorithms, this driver can only match the device whose api is
+ hisi_qm_v5 and algorithms contain udma.
+
+Device Configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration requirements:
+
+* ``ring_size`` obtain from UACCE API and is a fixed value.
+* Only one ``vchan`` is supported per ``dmadev``.
+* Silent mode is not supported.
+* The transfer direction must be set to ``RTE_DMA_DIR_MEM_TO_MEM``.
+
+
+Device Datapath Capability and Limitation
+-----------------------------------------
+
+Support memory copy and fill operations.
+
+.. note::
+ Currently, the maximum size of the operation data is limited to 16MB-1B
+ in the driver. The device actually supports operations in a larger data
+ size, but the driver requires complex operations in the datapth. If you
+ have such requirement, please contact the maintainers.
diff --git a/doc/guides/dmadevs/index.rst b/doc/guides/dmadevs/index.rst
index 15ddaf5192..dcc8c189ba 100644
--- a/doc/guides/dmadevs/index.rst
+++ b/doc/guides/dmadevs/index.rst
@@ -11,6 +11,7 @@ an application through DMA API.
:maxdepth: 1
:numbered:
+ acc
cnxk
dpaa
dpaa2
diff --git a/doc/guides/rel_notes/release_25_11.rst b/doc/guides/rel_notes/release_25_11.rst
index ccad6d89ff..977f4c7e43 100644
--- a/doc/guides/rel_notes/release_25_11.rst
+++ b/doc/guides/rel_notes/release_25_11.rst
@@ -55,6 +55,12 @@ New Features
Also, make sure to start the actual text at the margin.
=======================================================
+ * **Add Hisilicon Accelerator DMA Driver.**
+
+ Kunpeng SoC has an internal accelerator unit which includes zip function,
+ and the zip also support data copy and fill. This driver exposes this
+ capability to DPDK application.
+
Removed Items
-------------
--
2.17.1
^ permalink raw reply [flat|nested] 5+ messages in thread