DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Burakov, Anatoly" <anatoly.burakov@intel.com>
To: "Burakov, Anatoly" <anatoly.burakov@intel.com>,
	"dev@dpdk.org" <dev@dpdk.org>
Subject: Re: [dpdk-dev] [PATCH v3] vfio: Support for no-IOMMU mode
Date: Wed, 27 Jan 2016 14:23:57 +0000	[thread overview]
Message-ID: <C6ECDF3AB251BE4894318F4E45123697820C5098@IRSMSX109.ger.corp.intel.com> (raw)
In-Reply-To: <1453903474-18807-1-git-send-email-anatoly.burakov@intel.com>

Apologies, lost the signoff from Santosh Shukla and also the commit message still mentions the file that is now non-existent, so I'll submit a v4.

Thanks,
Anatoly


> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Anatoly Burakov
> Sent: Wednesday, January 27, 2016 2:05 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v3] vfio: Support for no-IOMMU mode
> 
> This commit is adding a generic mechanism to support multiple IOMMU
> types. For now, it's only type 1 (x86 IOMMU) and no-IOMMU (a special VFIO
> mode that doesn't use IOMMU at all), but it's easily extended by adding
> necessary definitions into eal_pci_init.h and a DMA mapping function to
> eal_pci_vfio_dma.c.
> 
> Since type 1 IOMMU module is no longer necessary to have VFIO, we fix the
> module check to check for vfio-pci instead. It's not ideal and triggers VFIO
> checks more often (and thus produces more error output, which was the
> reason behind the module check in the first place), so we compensate for
> that by providing more verbose logging, indicating whether VFIO initialization
> has succeeded or failed.
> 
> Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
> Tested-by: Santosh Shukla <sshukla@mvista.com>
> ---
> v3 changes:
>   Merging DMA mapping functions back into eal_pci_vfio.c
>   Fixing and adding comments
> 
> v2 changes:
>   Compile fix (hat-tip to Santosh Shukla)
>   Tested-by is provisional, since only superficial testing was done
> 
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 205 +++++++++++++++++++++--
> ------
>  lib/librte_eal/linuxapp/eal/eal_vfio.h     |   5 +
>  2 files changed, 157 insertions(+), 53 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index 74f91ba..fdf334b 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -72,11 +72,74 @@ EAL_REGISTER_TAILQ(rte_vfio_tailq)
>  #define VFIO_DIR "/dev/vfio"
>  #define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
>  #define VFIO_GROUP_FMT "/dev/vfio/%u"
> +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
>  #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
> 
>  /* per-process VFIO config */
>  static struct vfio_config vfio_cfg;
> 
> +/* DMA mapping function prototype.
> + * Takes VFIO container fd as a parameter.
> + * Returns 0 on success, -1 on error.
> + * */
> +typedef  int (*vfio_dma_func_t)(int);
> +
> +struct vfio_iommu_type {
> +	int type_id;
> +	const char *name;
> +	vfio_dma_func_t dma_map_func;
> +};
> +
> +int vfio_iommu_type1_dma_map(int);
> +int vfio_iommu_noiommu_dma_map(int);
> +
> +/* IOMMU types we support */
> +static const struct vfio_iommu_type iommu_types[] = {
> +		/* x86 IOMMU, otherwise known as type 1 */
> +		{ VFIO_TYPE1_IOMMU, "Type 1",
> &vfio_iommu_type1_dma_map},
> +		/* IOMMU-less mode */
> +		{ VFIO_NOIOMMU_IOMMU, "No-IOMMU",
> &vfio_iommu_noiommu_dma_map}, };
> +
> +int
> +vfio_iommu_type1_dma_map(int vfio_container_fd) {
> +	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
> +	int i, ret;
> +
> +	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
> +	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
> +		struct vfio_iommu_type1_dma_map dma_map;
> +
> +		if (ms[i].addr == NULL)
> +			break;
> +
> +		memset(&dma_map, 0, sizeof(dma_map));
> +		dma_map.argsz = sizeof(struct
> vfio_iommu_type1_dma_map);
> +		dma_map.vaddr = ms[i].addr_64;
> +		dma_map.size = ms[i].len;
> +		dma_map.iova = ms[i].phys_addr;
> +		dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
> VFIO_DMA_MAP_FLAG_WRITE;
> +
> +		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA,
> &dma_map);
> +
> +		if (ret) {
> +			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping,
> "
> +					"error %i (%s)\n", errno,
> strerror(errno));
> +			return -1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int
> +vfio_iommu_noiommu_dma_map(int __rte_unused vfio_container_fd) {
> +	/* No-IOMMU mode does not need DMA mapping */
> +	return 0;
> +}
> +
>  int
>  pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
>  		    void *buf, size_t len, off_t offs) @@ -208,42 +271,58 @@
> pci_vfio_set_bus_master(int dev_fd)
>  	return 0;
>  }
> 
> -/* set up DMA mappings */
> -static int
> -pci_vfio_setup_dma_maps(int vfio_container_fd) -{
> -	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
> -	int i, ret;
> -
> -	ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
> -			VFIO_TYPE1_IOMMU);
> -	if (ret) {
> -		RTE_LOG(ERR, EAL, "  cannot set IOMMU type, "
> -				"error %i (%s)\n", errno, strerror(errno));
> -		return -1;
> +/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for
> +error */ static const struct vfio_iommu_type *
> +pci_vfio_set_iommu_type(int vfio_container_fd) {
> +	unsigned idx;
> +	for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
> +		const struct vfio_iommu_type *t = &iommu_types[idx];
> +
> +		int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
> +				t->type_id);
> +		if (!ret) {
> +			RTE_LOG(NOTICE, EAL, "  using IOMMU type %d
> (%s)\n",
> +					t->type_id, t->name);
> +			return t;
> +		}
> +		/* not an error, there may be more supported IOMMU types
> */
> +		RTE_LOG(DEBUG, EAL, "  set IOMMU type %d (%s) failed, "
> +				"error %i (%s)\n", t->type_id, t->name,
> errno,
> +				strerror(errno));
>  	}
> +	/* if we didn't find a suitable IOMMU type, fail */
> +	return NULL;
> +}
> 
> -	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
> -	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
> -		struct vfio_iommu_type1_dma_map dma_map;
> -
> -		if (ms[i].addr == NULL)
> -			break;
> -
> -		memset(&dma_map, 0, sizeof(dma_map));
> -		dma_map.argsz = sizeof(struct
> vfio_iommu_type1_dma_map);
> -		dma_map.vaddr = ms[i].addr_64;
> -		dma_map.size = ms[i].len;
> -		dma_map.iova = ms[i].phys_addr;
> -		dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
> VFIO_DMA_MAP_FLAG_WRITE;
> -
> -		ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA,
> &dma_map);
> +/* check if we have any supported extensions */ static int
> +pci_vfio_has_supported_extensions(int vfio_container_fd) {
> +	int ret;
> +	unsigned idx, n_extensions = 0;
> +	for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
> +		const struct vfio_iommu_type *t = &iommu_types[idx];
> 
> -		if (ret) {
> -			RTE_LOG(ERR, EAL, "  cannot set up DMA remapping,
> "
> -					"error %i (%s)\n", errno,
> strerror(errno));
> +		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
> +				t->type_id);
> +		if (ret < 0) {
> +			RTE_LOG(ERR, EAL, "  could not get IOMMU type, "
> +				"error %i (%s)\n", errno,
> +				strerror(errno));
> +			close(vfio_container_fd);
>  			return -1;
> +		} else if (ret == 1) {
> +			/* we found a supported extension */
> +			n_extensions++;
>  		}
> +		RTE_LOG(DEBUG, EAL, "  IOMMU type %d (%s) is %s\n",
> +				t->type_id, t->name,
> +				ret ? "supported" : "not supported");
> +	}
> +
> +	/* if we didn't find any supported IOMMU types, fail */
> +	if (!n_extensions) {
> +		close(vfio_container_fd);
> +		return -1;
>  	}
> 
>  	return 0;
> @@ -372,17 +451,10 @@ pci_vfio_get_container_fd(void)
>  			return -1;
>  		}
> 
> -		/* check if we support IOMMU type 1 */
> -		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
> VFIO_TYPE1_IOMMU);
> -		if (ret != 1) {
> -			if (ret < 0)
> -				RTE_LOG(ERR, EAL, "  could not get IOMMU
> type, "
> -					"error %i (%s)\n", errno,
> -					strerror(errno));
> -			else
> -				RTE_LOG(ERR, EAL, "  unsupported IOMMU
> type "
> -					"detected in VFIO\n");
> -			close(vfio_container_fd);
> +		ret =
> pci_vfio_has_supported_extensions(vfio_container_fd);
> +		if (ret) {
> +			RTE_LOG(ERR, EAL, "  no supported IOMMU "
> +					"extensions found!\n");
>  			return -1;
>  		}
> 
> @@ -432,6 +504,7 @@ pci_vfio_get_group_fd(int iommu_group_no)
> 
>  	/* if primary, try to open the group */
>  	if (internal_config.process_type == RTE_PROC_PRIMARY) {
> +		/* try regular group format */
>  		snprintf(filename, sizeof(filename),
>  				 VFIO_GROUP_FMT, iommu_group_no);
>  		vfio_group_fd = open(filename, O_RDWR); @@ -442,7
> +515,20 @@ pci_vfio_get_group_fd(int iommu_group_no)
>  						strerror(errno));
>  				return -1;
>  			}
> -			return 0;
> +
> +			/* special case: try no-IOMMU path as well */
> +			snprintf(filename, sizeof(filename),
> +					VFIO_NOIOMMU_GROUP_FMT,
> iommu_group_no);
> +			vfio_group_fd = open(filename, O_RDWR);
> +			if (vfio_group_fd < 0) {
> +				if (errno != ENOENT) {
> +					RTE_LOG(ERR, EAL, "Cannot open %s:
> %s\n", filename,
> +							strerror(errno));
> +					return -1;
> +				}
> +				return 0;
> +			}
> +			/* noiommu group found */
>  		}
> 
>  		/* if the fd is valid, create a new group for it */ @@ -660,14
> +746,21 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
>  	}
> 
>  	/*
> -	 * set up DMA mappings for container
> +	 * pick an IOMMU type and set up DMA mappings for container
>  	 *
>  	 * needs to be done only once, only when at least one group is
> assigned to
>  	 * a container and only in primary process
>  	 */
>  	if (internal_config.process_type == RTE_PROC_PRIMARY &&
>  			vfio_cfg.vfio_container_has_dma == 0) {
> -		ret =
> pci_vfio_setup_dma_maps(vfio_cfg.vfio_container_fd);
> +		/* select an IOMMU type which we will be using */
> +		const struct vfio_iommu_type *t =
> +
> 	pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
> +		if (!t) {
> +			RTE_LOG(ERR, EAL, "  %s failed to select IOMMU
> type\n", pci_addr);
> +			return -1;
> +		}
> +		ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
>  		if (ret) {
>  			RTE_LOG(ERR, EAL, "  %s DMA remapping failed, "
>  					"error %i (%s)\n", pci_addr, errno,
> strerror(errno)); @@ -887,35 +980,41 @@ pci_vfio_enable(void)  {
>  	/* initialize group list */
>  	int i;
> -	int module_vfio_type1;
> +	int vfio_available;
> 
>  	for (i = 0; i < VFIO_MAX_GROUPS; i++) {
>  		vfio_cfg.vfio_groups[i].fd = -1;
>  		vfio_cfg.vfio_groups[i].group_no = -1;
>  	}
> 
> -	module_vfio_type1 = rte_eal_check_module("vfio_iommu_type1");
> +	/* inform the user that we are probing for VFIO */
> +	RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
> +
> +	/* check if vfio-pci module is loaded */
> +	vfio_available = rte_eal_check_module("vfio_pci");
> 
>  	/* return error directly */
> -	if (module_vfio_type1 == -1) {
> +	if (vfio_available == -1) {
>  		RTE_LOG(INFO, EAL, "Could not get loaded module
> details!\n");
>  		return -1;
>  	}
> 
>  	/* return 0 if VFIO modules not loaded */
> -	if (module_vfio_type1 == 0) {
> -		RTE_LOG(INFO, EAL, "VFIO modules not all loaded, "
> -			"skip VFIO support...\n");
> +	if (vfio_available == 0) {
> +		RTE_LOG(INFO, EAL, "VFIO modules not loaded, "
> +			"skipping VFIO support...\n");
>  		return 0;
>  	}
> 
>  	vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd();
> 
>  	/* check if we have VFIO driver enabled */
> -	if (vfio_cfg.vfio_container_fd != -1)
> +	if (vfio_cfg.vfio_container_fd != -1) {
> +		RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
>  		vfio_cfg.vfio_enabled = 1;
> -	else
> +	} else {
>  		RTE_LOG(NOTICE, EAL, "VFIO support could not be
> initialized\n");
> +	}
> 
>  	return 0;
>  }
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h
> b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> index 72ec3f6..638ee31 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> @@ -52,6 +52,11 @@
>  #define RTE_PCI_MSIX_FLAGS_QSIZE  PCI_MSIX_FLAGS_QSIZE  #endif
> 
> +/* older kernels may not have no-IOMMU mode */ #ifndef
> +VFIO_NOIOMMU_IOMMU #define VFIO_NOIOMMU_IOMMU 8 #endif
> +
>  #define VFIO_PRESENT
>  #endif /* kernel version */
>  #endif /* RTE_EAL_VFIO */
> --
> 2.5.0

  reply	other threads:[~2016-01-27 14:24 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-21 20:16 [dpdk-dev] [PATCH] " Anatoly Burakov
2016-01-13 12:36 ` [dpdk-dev] [PATCH v2] " Anatoly Burakov
2016-01-13 16:45   ` Stephen Hemminger
2016-01-14  9:50     ` Burakov, Anatoly
2016-01-27  9:05   ` Thomas Monjalon
2016-01-27 10:08     ` Burakov, Anatoly
2016-01-27 10:12       ` Thomas Monjalon
2016-01-27 10:24         ` David Marchand
2016-01-27 10:29           ` Burakov, Anatoly
2016-01-27 14:04   ` [dpdk-dev] [PATCH v3] " Anatoly Burakov
2016-01-27 14:23     ` Burakov, Anatoly [this message]
2016-01-27 14:32     ` [dpdk-dev] [PATCH v4] " Anatoly Burakov
2016-01-27 15:50       ` Thomas Monjalon
2016-01-27 16:01         ` Burakov, Anatoly
2016-01-27 16:30           ` Burakov, Anatoly
2016-01-27 16:50       ` [dpdk-dev] [PATCH v5] " Anatoly Burakov
2016-01-27 17:07         ` Thomas Monjalon
2016-01-28 10:03           ` Burakov, Anatoly
2016-01-28 13:27             ` Thomas Monjalon
2016-01-28 11:57         ` [dpdk-dev] [PATCH v6] " Anatoly Burakov
2016-01-28 13:58           ` Thomas Monjalon
2016-01-28 14:16             ` Burakov, Anatoly
2016-01-28 14:40               ` Thomas Monjalon
2016-01-28 15:00                 ` Burakov, Anatoly
2016-01-28 16:55           ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=C6ECDF3AB251BE4894318F4E45123697820C5098@IRSMSX109.ger.corp.intel.com \
    --to=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).