DPDK patches and discussions
 help / color / mirror / Atom feed
From: Anatoly Burakov <anatoly.burakov@intel.com>
To: dev@dpdk.org, Chengwen Feng <fengchengwen@huawei.com>,
	Kevin Laatz <kevin.laatz@intel.com>,
	Bruce Richardson <bruce.richardson@intel.com>
Cc: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Subject: [PATCH v1 2/3] dma/idxd: implement inter-domain operations
Date: Fri, 11 Aug 2023 16:14:45 +0000	[thread overview]
Message-ID: <10660b2852115b92ccc6cc193c5b693183217a80.1691768110.git.anatoly.burakov@intel.com> (raw)
In-Reply-To: <cover.1691768109.git.anatoly.burakov@intel.com>

Implement inter-domain copy and fill operations defined in the newly
added DMA device API.

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---
 doc/guides/prog_guide/dmadev.rst |   4 +
 drivers/dma/idxd/idxd_bus.c      |  35 +++++++++
 drivers/dma/idxd/idxd_common.c   | 123 +++++++++++++++++++++++++++----
 drivers/dma/idxd/idxd_hw_defs.h  |  14 +++-
 drivers/dma/idxd/idxd_internal.h |   7 ++
 5 files changed, 165 insertions(+), 18 deletions(-)

diff --git a/doc/guides/prog_guide/dmadev.rst b/doc/guides/prog_guide/dmadev.rst
index e4e5196416..c2a957e971 100644
--- a/doc/guides/prog_guide/dmadev.rst
+++ b/doc/guides/prog_guide/dmadev.rst
@@ -126,6 +126,10 @@ destination PASID to perform the operation. When `src_handle` value is set,
 Currently, source and destination handles are opaque values the user has to get
 from private API's of those DMA device drivers that support the operation.
 
+List of drivers supporting inter-domain operations:
+
+- Intel(R) IDXD driver
+
 
 Querying Device Statistics
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/drivers/dma/idxd/idxd_bus.c b/drivers/dma/idxd/idxd_bus.c
index 3b2d4c2b65..787bc4e2d7 100644
--- a/drivers/dma/idxd/idxd_bus.c
+++ b/drivers/dma/idxd/idxd_bus.c
@@ -7,6 +7,7 @@
 #include <unistd.h>
 #include <sys/mman.h>
 #include <libgen.h>
+#include <inttypes.h>
 
 #include <bus_driver.h>
 #include <dev_driver.h>
@@ -187,6 +188,31 @@ read_wq_int(struct rte_dsa_device *dev, const char *filename,
 	return ret;
 }
 
+static int
+read_gen_cap(struct rte_dsa_device *dev, uint64_t *gen_cap)
+{
+	char sysfs_node[PATH_MAX];
+	FILE *f;
+
+	snprintf(sysfs_node, sizeof(sysfs_node), "%s/dsa%d/gen_cap",
+		dsa_get_sysfs_path(), dev->addr.device_id);
+	f = fopen(sysfs_node, "r");
+	if (f == NULL) {
+		IDXD_PMD_ERR("%s(): opening file '%s' failed: %s",
+				__func__, sysfs_node, strerror(errno));
+		return -1;
+	}
+
+	if (fscanf(f, "%" PRIx64, gen_cap) != 1) {
+		IDXD_PMD_ERR("%s(): error reading file '%s': %s",
+				__func__, sysfs_node, strerror(errno));
+		return -1;
+	}
+
+	fclose(f);
+	return 0;
+}
+
 static int
 read_device_int(struct rte_dsa_device *dev, const char *filename,
 		int *value)
@@ -219,6 +245,7 @@ idxd_probe_dsa(struct rte_dsa_device *dev)
 {
 	struct idxd_dmadev idxd = {0};
 	int ret = 0;
+	uint64_t gen_cap;
 
 	IDXD_PMD_INFO("Probing device %s on numa node %d",
 			dev->wq_name, dev->device.numa_node);
@@ -232,6 +259,14 @@ idxd_probe_dsa(struct rte_dsa_device *dev)
 	idxd.u.bus.dsa_id = dev->addr.device_id;
 	idxd.sva_support = 1;
 
+	ret = read_gen_cap(dev, &gen_cap);
+	if (ret) {
+		IDXD_PMD_ERR("Failed to read gen_cap for %s", dev->wq_name);
+		return ret;
+	}
+	if (gen_cap & IDXD_INTERDOM_SUPPORT)
+		idxd.inter_dom_support = 1;
+
 	idxd.portal = idxd_bus_mmap_wq(dev);
 	if (idxd.portal == NULL) {
 		IDXD_PMD_ERR("WQ mmap failed");
diff --git a/drivers/dma/idxd/idxd_common.c b/drivers/dma/idxd/idxd_common.c
index 83d53942eb..ffe8614d16 100644
--- a/drivers/dma/idxd/idxd_common.c
+++ b/drivers/dma/idxd/idxd_common.c
@@ -41,7 +41,57 @@ __idxd_movdir64b(volatile void *dst, const struct idxd_hw_desc *src)
 
 __use_avx2
 static __rte_always_inline void
-__submit(struct idxd_dmadev *idxd)
+__idxd_enqcmd(volatile void *dst, const struct idxd_hw_desc *src)
+{
+	asm volatile (".byte 0xf2, 0x0f, 0x38, 0xf8, 0x02"
+			:
+			: "a" (dst), "d" (src)
+			: "memory");
+}
+
+static inline uint32_t
+__idxd_get_inter_dom_flags(const enum rte_idxd_ops op)
+{
+	switch (op) {
+	case idxd_op_memmove:
+		return IDXD_FLAG_SRC_ALT_PASID | IDXD_FLAG_DST_ALT_PASID;
+	case idxd_op_fill:
+		return IDXD_FLAG_DST_ALT_PASID;
+	default:
+		/* no flags needed */
+		return 0;
+	}
+}
+
+static inline uint32_t
+__idxd_get_op_flags(enum rte_idxd_ops op, uint64_t flags, bool inter_dom)
+{
+	uint32_t op_flags = op;
+	uint32_t flag_mask = IDXD_FLAG_FENCE;
+	if (inter_dom) {
+		flag_mask |=  __idxd_get_inter_dom_flags(op);
+		op_flags |= idxd_op_inter_dom;
+	}
+	op_flags = op_flags << IDXD_CMD_OP_SHIFT;
+	return op_flags | (flags & flag_mask) | IDXD_FLAG_CACHE_CONTROL;
+}
+
+static inline uint64_t
+__idxd_get_alt_pasid(uint64_t flags, uint64_t src_idpte_id,
+		uint64_t dst_idpte_id)
+{
+	/* hardware is intolerant to inactive fields being non-zero */
+	if (!(flags & RTE_DMA_OP_FLAG_SRC_HANDLE))
+		src_idpte_id = 0;
+	if (!(flags & RTE_DMA_OP_FLAG_DST_HANDLE))
+		dst_idpte_id = 0;
+	return (src_idpte_id << IDXD_CMD_DST_IDPTE_IDX_SHIFT) |
+			(dst_idpte_id << IDXD_CMD_DST_IDPTE_IDX_SHIFT);
+}
+
+__use_avx2
+static __rte_always_inline void
+__submit(struct idxd_dmadev *idxd, const bool use_enqcmd)
 {
 	rte_prefetch1(&idxd->batch_comp_ring[idxd->batch_idx_read]);
 
@@ -59,7 +109,10 @@ __submit(struct idxd_dmadev *idxd)
 		desc.completion = comp_addr;
 		desc.op_flags |= IDXD_FLAG_REQUEST_COMPLETION;
 		_mm_sfence(); /* fence before writing desc to device */
-		__idxd_movdir64b(idxd->portal, &desc);
+		if (use_enqcmd)
+			__idxd_enqcmd(idxd->portal, &desc);
+		else
+			__idxd_movdir64b(idxd->portal, &desc);
 	} else {
 		const struct idxd_hw_desc batch_desc = {
 				.op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
@@ -71,7 +124,10 @@ __submit(struct idxd_dmadev *idxd)
 				.size = idxd->batch_size,
 		};
 		_mm_sfence(); /* fence before writing desc to device */
-		__idxd_movdir64b(idxd->portal, &batch_desc);
+		if (use_enqcmd)
+			__idxd_enqcmd(idxd->portal, &batch_desc);
+		else
+			__idxd_movdir64b(idxd->portal, &batch_desc);
 	}
 
 	if (++idxd->batch_idx_write > idxd->max_batches)
@@ -93,7 +149,9 @@ __idxd_write_desc(struct idxd_dmadev *idxd,
 		const rte_iova_t src,
 		const rte_iova_t dst,
 		const uint32_t size,
-		const uint32_t flags)
+		const uint32_t flags,
+		const uint64_t alt_pasid,
+		const bool use_enqcmd)
 {
 	uint16_t mask = idxd->desc_ring_mask;
 	uint16_t job_id = idxd->batch_start + idxd->batch_size;
@@ -113,14 +171,14 @@ __idxd_write_desc(struct idxd_dmadev *idxd,
 	_mm256_store_si256((void *)&idxd->desc_ring[write_idx],
 			_mm256_set_epi64x(dst, src, comp_addr, op_flags64));
 	_mm256_store_si256((void *)&idxd->desc_ring[write_idx].size,
-			_mm256_set_epi64x(0, 0, 0, size));
+			_mm256_set_epi64x(alt_pasid, 0, 0, size));
 
 	idxd->batch_size++;
 
 	rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
 
 	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
-		__submit(idxd);
+		__submit(idxd, use_enqcmd);
 
 	return job_id;
 }
@@ -134,10 +192,26 @@ idxd_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
 	 * but check it at compile time to be sure.
 	 */
 	RTE_BUILD_BUG_ON(RTE_DMA_OP_FLAG_FENCE != IDXD_FLAG_FENCE);
-	uint32_t memmove = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) |
-			IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
-	return __idxd_write_desc(dev_private, memmove, src, dst, length,
-			flags);
+	uint32_t op_flags = __idxd_get_op_flags(idxd_op_memmove, flags, false);
+	return __idxd_write_desc(dev_private, op_flags, src, dst, length,
+			flags, 0, false);
+}
+
+__use_avx2
+int
+idxd_enqueue_copy_inter_dom(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
+		rte_iova_t dst, unsigned int length,
+		uint16_t src_idpte_id, uint16_t dst_idpte_id, uint64_t flags)
+{
+	/* we can take advantage of the fact that the fence flag in dmadev and
+	 * DSA are the same, but check it at compile time to be sure.
+	 */
+	RTE_BUILD_BUG_ON(RTE_DMA_OP_FLAG_FENCE != IDXD_FLAG_FENCE);
+	uint32_t op_flags = __idxd_get_op_flags(idxd_op_memmove, flags, true);
+	uint64_t alt_pasid = __idxd_get_alt_pasid(flags, src_idpte_id, dst_idpte_id);
+	/* currently, we require inter-domain copies to use enqcmd */
+	return __idxd_write_desc(dev_private, op_flags, src, dst, length,
+			flags, alt_pasid, true);
 }
 
 __use_avx2
@@ -145,17 +219,28 @@ int
 idxd_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
 		rte_iova_t dst, unsigned int length, uint64_t flags)
 {
-	uint32_t fill = (idxd_op_fill << IDXD_CMD_OP_SHIFT) |
-			IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
-	return __idxd_write_desc(dev_private, fill, pattern, dst, length,
-			flags);
+	uint32_t op_flags = __idxd_get_op_flags(idxd_op_fill, flags, false);
+	return __idxd_write_desc(dev_private, op_flags, pattern, dst, length,
+			flags, 0, false);
+}
+
+__use_avx2
+int
+idxd_enqueue_fill_inter_dom(void *dev_private, uint16_t qid __rte_unused,
+		uint64_t pattern, rte_iova_t dst, unsigned int length,
+		uint16_t dst_idpte_id, uint64_t flags)
+{
+	uint32_t op_flags = __idxd_get_op_flags(idxd_op_fill, flags, true);
+	uint64_t alt_pasid = __idxd_get_alt_pasid(flags, 0, dst_idpte_id);
+	return __idxd_write_desc(dev_private, op_flags, pattern, dst, length,
+			flags, alt_pasid, true);
 }
 
 __use_avx2
 int
 idxd_submit(void *dev_private, uint16_t qid __rte_unused)
 {
-	__submit(dev_private);
+	__submit(dev_private, false);
 	return 0;
 }
 
@@ -490,6 +575,12 @@ idxd_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t
 	};
 	if (idxd->sva_support)
 		info->dev_capa |= RTE_DMA_CAPA_SVA;
+
+	if (idxd->inter_dom_support) {
+		info->dev_capa |= RTE_DMA_CAPA_OPS_INTER_DOM;
+		info->controller_id = idxd->u.bus.dsa_id;
+	}
+
 	return 0;
 }
 
@@ -600,6 +691,8 @@ idxd_dmadev_create(const char *name, struct rte_device *dev,
 	dmadev->fp_obj->completed_status = idxd_completed_status;
 	dmadev->fp_obj->burst_capacity = idxd_burst_capacity;
 	dmadev->fp_obj->dev_private = dmadev->data->dev_private;
+	dmadev->fp_obj->copy_inter_dom = idxd_enqueue_copy_inter_dom;
+	dmadev->fp_obj->fill_inter_dom = idxd_enqueue_fill_inter_dom;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
diff --git a/drivers/dma/idxd/idxd_hw_defs.h b/drivers/dma/idxd/idxd_hw_defs.h
index a38540f283..441e9d29a4 100644
--- a/drivers/dma/idxd/idxd_hw_defs.h
+++ b/drivers/dma/idxd/idxd_hw_defs.h
@@ -9,18 +9,24 @@
  * Defines used in the data path for interacting with IDXD hardware.
  */
 #define IDXD_CMD_OP_SHIFT 24
+#define IDXD_CMD_SRC_IDPTE_IDX_SHIFT 32
+#define IDXD_CMD_DST_IDPTE_IDX_SHIFT 48
 enum rte_idxd_ops {
 	idxd_op_nop = 0,
 	idxd_op_batch,
 	idxd_op_drain,
 	idxd_op_memmove,
-	idxd_op_fill
+	idxd_op_fill,
+	idxd_op_inter_dom = 0x20
 };
 
 #define IDXD_FLAG_FENCE                 (1 << 0)
 #define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2)
 #define IDXD_FLAG_REQUEST_COMPLETION    (1 << 3)
+#define IDXD_INTERDOM_SUPPORT           (1 << 6)
 #define IDXD_FLAG_CACHE_CONTROL         (1 << 8)
+#define IDXD_FLAG_SRC_ALT_PASID         (1 << 16)
+#define IDXD_FLAG_DST_ALT_PASID         (1 << 17)
 
 /**
  * Hardware descriptor used by DSA hardware, for both bursts and
@@ -42,8 +48,10 @@ struct idxd_hw_desc {
 
 	uint16_t intr_handle; /* completion interrupt handle */
 
-	/* remaining 26 bytes are reserved */
-	uint16_t reserved[13];
+	/* next 22 bytes are reserved */
+	uint16_t reserved[11];
+	uint16_t src_pasid_hndl;  /* pasid handle for source */
+	uint16_t dest_pasid_hndl; /* pasid handle for destination */
 } __rte_aligned(64);
 
 #define IDXD_COMP_STATUS_INCOMPLETE        0
diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h
index cd4177721d..fb999d29f7 100644
--- a/drivers/dma/idxd/idxd_internal.h
+++ b/drivers/dma/idxd/idxd_internal.h
@@ -70,6 +70,7 @@ struct idxd_dmadev {
 	struct rte_dma_dev *dmadev;
 	struct rte_dma_vchan_conf qcfg;
 	uint8_t sva_support;
+	uint8_t	inter_dom_support;
 	uint8_t qid;
 
 	union {
@@ -92,8 +93,14 @@ int idxd_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_info,
 		uint32_t size);
 int idxd_enqueue_copy(void *dev_private, uint16_t qid, rte_iova_t src,
 		rte_iova_t dst, unsigned int length, uint64_t flags);
+int idxd_enqueue_copy_inter_dom(void *dev_private, uint16_t qid, rte_iova_t src,
+		rte_iova_t dst, unsigned int length,
+		uint16_t src_idpte_id, uint16_t dst_idpte_id, uint64_t flags);
 int idxd_enqueue_fill(void *dev_private, uint16_t qid, uint64_t pattern,
 		rte_iova_t dst, unsigned int length, uint64_t flags);
+int idxd_enqueue_fill_inter_dom(void *dev_private, uint16_t qid, uint64_t pattern,
+		rte_iova_t dst, unsigned int length, uint16_t dst_idpte_id,
+		uint64_t flags);
 int idxd_submit(void *dev_private, uint16_t qid);
 uint16_t idxd_completed(void *dev_private, uint16_t qid, uint16_t max_ops,
 		uint16_t *last_idx, bool *has_error);
-- 
2.37.2


  parent reply	other threads:[~2023-08-11 16:15 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-11 16:14 [PATCH v1 0/3] Add support for inter-domain DMA operations Anatoly Burakov
2023-08-11 16:14 ` [PATCH v1 1/3] dmadev: add inter-domain operations Anatoly Burakov
2023-08-18  8:08   ` [EXT] " Anoob Joseph
2023-10-08  2:33   ` fengchengwen
2023-10-09  5:05     ` Jerin Jacob
2023-10-27 13:46       ` Medvedkin, Vladimir
2023-11-23  5:24         ` Jerin Jacob
2023-08-11 16:14 ` Anatoly Burakov [this message]
2023-08-11 16:14 ` [PATCH v1 3/3] dma/idxd: add API to create and attach to window Anatoly Burakov
2023-08-14  4:39   ` Jerin Jacob
2023-08-14  9:55     ` Burakov, Anatoly
2023-08-15 19:20 ` [EXT] [PATCH v1 0/3] Add support for inter-domain DMA operations Satananda Burla

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=10660b2852115b92ccc6cc193c5b693183217a80.1691768110.git.anatoly.burakov@intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=fengchengwen@huawei.com \
    --cc=kevin.laatz@intel.com \
    --cc=vladimir.medvedkin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).