Discussion:
[PATCH 1/5] vfio/iommu_type1: support for platform bus devices on ARM
Antonios Motakis
2014-10-13 13:09:04 UTC
Permalink
This allows to make use of the VFIO_IOMMU_TYPE1 driver with platform
devices on ARM. The driver can then be used with an Exynos SMMU, or
ARM SMMU driver.

Signed-off-by: Antonios Motakis <a.motakis-lrHrjnjw1UfHK3s98zE1ajGjJy/***@public.gmane.org>
---
drivers/vfio/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index d8c5763..a0abe04 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -16,7 +16,7 @@ config VFIO_SPAPR_EEH
menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
- select VFIO_IOMMU_TYPE1 if X86
+ select VFIO_IOMMU_TYPE1 if X86 || ARM
select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
select ANON_INODES
--
2.1.1
Antonios Motakis
2014-10-13 13:09:05 UTC
Permalink
We introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag to the VFIO dma map call,
and expose its availability via the capability VFIO_DMA_NOEXEC_IOMMU.
This way the user can control whether the XN flag will be set on the
requested mappings. The IOMMU_NOEXEC flag needs to be available for all
the IOMMUs of the container used.

Signed-off-by: Antonios Motakis <a.motakis-lrHrjnjw1UfHK3s98zE1ajGjJy/***@public.gmane.org>
---
include/uapi/linux/vfio.h | 2 ++
1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6612974..111b5e8 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -29,6 +29,7 @@
* capability is subject to change as groups are added or removed.
*/
#define VFIO_DMA_CC_IOMMU 4
+#define VFIO_DMA_NOEXEC_IOMMU 5

/* Check if EEH is supported */
#define VFIO_EEH 5
@@ -401,6 +402,7 @@ struct vfio_iommu_type1_dma_map {
__u32 flags;
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
+#define VFIO_DMA_MAP_FLAG_NOEXEC (1 << 2) /* not executable from device */
__u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
--
2.1.1
Alex Williamson
2014-10-20 21:29:43 UTC
Permalink
Post by Antonios Motakis
We introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag to the VFIO dma map call,
and expose its availability via the capability VFIO_DMA_NOEXEC_IOMMU.
This way the user can control whether the XN flag will be set on the
requested mappings. The IOMMU_NOEXEC flag needs to be available for all
the IOMMUs of the container used.
---
include/uapi/linux/vfio.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6612974..111b5e8 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -29,6 +29,7 @@
* capability is subject to change as groups are added or removed.
*/
#define VFIO_DMA_CC_IOMMU 4
+#define VFIO_DMA_NOEXEC_IOMMU 5
/* Check if EEH is supported */
#define VFIO_EEH 5
^^
5 is still already used. Feel free to convert to enum so we stop making
this mistake.
Post by Antonios Motakis
@@ -401,6 +402,7 @@ struct vfio_iommu_type1_dma_map {
__u32 flags;
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
+#define VFIO_DMA_MAP_FLAG_NOEXEC (1 << 2) /* not executable from device */
__u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
Antonios Motakis
2014-10-21 12:17:48 UTC
Permalink
On Mon, Oct 20, 2014 at 11:29 PM, Alex Williamson
Post by Alex Williamson
Post by Antonios Motakis
We introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag to the VFIO dma map call,
and expose its availability via the capability VFIO_DMA_NOEXEC_IOMMU.
This way the user can control whether the XN flag will be set on the
requested mappings. The IOMMU_NOEXEC flag needs to be available for all
the IOMMUs of the container used.
---
include/uapi/linux/vfio.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6612974..111b5e8 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -29,6 +29,7 @@
* capability is subject to change as groups are added or removed.
*/
#define VFIO_DMA_CC_IOMMU 4
+#define VFIO_DMA_NOEXEC_IOMMU 5
/* Check if EEH is supported */
#define VFIO_EEH 5
^^
5 is still already used. Feel free to convert to enum so we stop making
this mistake.
Oops :) will do.
Post by Alex Williamson
Post by Antonios Motakis
@@ -401,6 +402,7 @@ struct vfio_iommu_type1_dma_map {
__u32 flags;
#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
+#define VFIO_DMA_MAP_FLAG_NOEXEC (1 << 2) /* not executable from device */
__u64 vaddr; /* Process virtual address */
__u64 iova; /* IO virtual address */
__u64 size; /* Size of mapping (bytes) */
--
Antonios Motakis
Virtual Open Systems
Andy Lutomirski
2014-10-20 21:37:17 UTC
Permalink
On Mon, Oct 13, 2014 at 6:09 AM, Antonios Motakis
Post by Antonios Motakis
We introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag to the VFIO dma map call,
and expose its availability via the capability VFIO_DMA_NOEXEC_IOMMU.
This way the user can control whether the XN flag will be set on the
requested mappings. The IOMMU_NOEXEC flag needs to be available for all
the IOMMUs of the container used.
Since you sent this to the linux-api list, I'll bite: what's the XN
flag? I know what PROT_EXEC does when you mmap something, and I
presume that vfio is mmappable, but I don't actually have any clue
what this patch does.

I assume that this does not have anything to do with a non-CPU DMA
master executing code in main memory, because that makes rather little
sense. (Or maybe it really does, in which case: weird.)

--Andy
Antonios Motakis
2014-10-21 12:23:00 UTC
Permalink
Post by Andy Lutomirski
On Mon, Oct 13, 2014 at 6:09 AM, Antonios Motakis
Post by Antonios Motakis
We introduce the VFIO_DMA_MAP_FLAG_NOEXEC flag to the VFIO dma map call,
and expose its availability via the capability VFIO_DMA_NOEXEC_IOMMU.
This way the user can control whether the XN flag will be set on the
requested mappings. The IOMMU_NOEXEC flag needs to be available for all
the IOMMUs of the container used.
Since you sent this to the linux-api list, I'll bite: what's the XN
flag? I know what PROT_EXEC does when you mmap something, and I
presume that vfio is mmappable, but I don't actually have any clue
what this patch does.
I assume that this does not have anything to do with a non-CPU DMA
master executing code in main memory, because that makes rather little
sense. (Or maybe it really does, in which case: weird.)
It does actually. For example, the ARM PL330 DMA controller will fetch
from memory code with DMA instructions, and it will respect this flag.
It is not code that can be executed on the CPU of course, but it is
executable on the DMAC.
Post by Andy Lutomirski
--Andy
--
Antonios Motakis
Virtual Open Systems
Antonios Motakis
2014-10-13 13:09:08 UTC
Permalink
Some IOMMU drivers, such as the ARM SMMU driver, make available the
IOMMU_NOEXEC flag, to set the page tables for a device as XN (execute never).
This affects devices such as the ARM PL330 DMA Controller, which respects
this flag and will refuse to fetch DMA instructions from memory where the
XN flag has been set.

The flag can be used only if all IOMMU domains behind the container support
the IOMMU_NOEXEC flag. Also, if any mappings are created with the flag, any
new domains with devices will have to support it as well.

Signed-off-by: Antonios Motakis <a.motakis-lrHrjnjw1UfHK3s98zE1ajGjJy/***@public.gmane.org>
---
drivers/vfio/vfio_iommu_type1.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8b4202a..e225e8f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -569,6 +569,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
prot |= IOMMU_READ;

+ if (map->flags & VFIO_DMA_MAP_FLAG_NOEXEC) {
+ if (!vfio_domains_have_iommu_cap(iommu, IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+ prot |= IOMMU_NOEXEC;
+ }
+
if (!prot || !size || (size | iova | vaddr) & mask)
return -EINVAL;

@@ -662,6 +668,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
dma = rb_entry(n, struct vfio_dma, node);
iova = dma->iova;

+ /*
+ * if any of the mappings to be replayed has the NOEXEC flag
+ * set, then the new iommu domain must support it
+ */
+ if ((dma->prot | IOMMU_NOEXEC) &&
+ !(domain->caps & IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+
while (iova < dma->iova + dma->size) {
phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
size_t size;
@@ -749,6 +763,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
domain->caps |= IOMMU_CAP_CACHE_COHERENCY;

+ if (iommu_capable(bus, IOMMU_CAP_NOEXEC))
+ domain->caps |= IOMMU_CAP_NOEXEC;
+
/*
* Try to match an existing compatible domain. We don't want to
* preclude an IOMMU driver supporting multiple bus_types and being
@@ -900,6 +917,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
return 0;
return vfio_domains_have_iommu_cap(iommu,
IOMMU_CAP_CACHE_COHERENCY);
+ case VFIO_DMA_NOEXEC_IOMMU:
+ if (!iommu)
+ return 0;
+ return vfio_domains_have_iommu_cap(iommu,
+ IOMMU_CAP_NOEXEC);
default:
return 0;
}
@@ -923,7 +945,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
+ VFIO_DMA_MAP_FLAG_WRITE |
+ VFIO_DMA_MAP_FLAG_NOEXEC;

minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
--
2.1.1
Alex Williamson
2014-10-20 21:13:23 UTC
Permalink
Post by Antonios Motakis
Some IOMMU drivers, such as the ARM SMMU driver, make available the
IOMMU_NOEXEC flag, to set the page tables for a device as XN (execute never).
This affects devices such as the ARM PL330 DMA Controller, which respects
this flag and will refuse to fetch DMA instructions from memory where the
XN flag has been set.
The flag can be used only if all IOMMU domains behind the container support
the IOMMU_NOEXEC flag. Also, if any mappings are created with the flag, any
new domains with devices will have to support it as well.
---
drivers/vfio/vfio_iommu_type1.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8b4202a..e225e8f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -569,6 +569,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
prot |= IOMMU_READ;
+ if (map->flags & VFIO_DMA_MAP_FLAG_NOEXEC) {
+ if (!vfio_domains_have_iommu_cap(iommu, IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+ prot |= IOMMU_NOEXEC;
+ }
+
if (!prot || !size || (size | iova | vaddr) & mask)
return -EINVAL;
I think this test needs to move above adding the NOEXEC flag, otherwise
we now allow mappings without read or write, which is an ABI change.
Post by Antonios Motakis
@@ -662,6 +668,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
dma = rb_entry(n, struct vfio_dma, node);
iova = dma->iova;
+ /*
+ * if any of the mappings to be replayed has the NOEXEC flag
+ * set, then the new iommu domain must support it
+ */
+ if ((dma->prot | IOMMU_NOEXEC) &&
I think you mean

& IOMMU_NOEXEC
Post by Antonios Motakis
+ !(domain->caps & IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+
In patch 2/5 you stated:

The IOMMU_NOEXEC flag needs to be available for all the IOMMUs
of the container used.

But here you'll create heterogeneous containers so long as there are no
NOEXEC mappings. Is that intentional or a side effect of the above
masking bug?
Post by Antonios Motakis
while (iova < dma->iova + dma->size) {
phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
size_t size;
@@ -749,6 +763,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
domain->caps |= IOMMU_CAP_CACHE_COHERENCY;
+ if (iommu_capable(bus, IOMMU_CAP_NOEXEC))
+ domain->caps |= IOMMU_CAP_NOEXEC;
+
/*
* Try to match an existing compatible domain. We don't want to
* preclude an IOMMU driver supporting multiple bus_types and being
@@ -900,6 +917,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
return 0;
return vfio_domains_have_iommu_cap(iommu,
IOMMU_CAP_CACHE_COHERENCY);
+ if (!iommu)
+ return 0;
+ return vfio_domains_have_iommu_cap(iommu,
+ IOMMU_CAP_NOEXEC);
return 0;
}
@@ -923,7 +945,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
+ VFIO_DMA_MAP_FLAG_WRITE |
+ VFIO_DMA_MAP_FLAG_NOEXEC;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
Antonios Motakis
2014-10-21 12:40:03 UTC
Permalink
On Mon, Oct 20, 2014 at 11:13 PM, Alex Williamson
Post by Alex Williamson
Post by Antonios Motakis
Some IOMMU drivers, such as the ARM SMMU driver, make available the
IOMMU_NOEXEC flag, to set the page tables for a device as XN (execute never).
This affects devices such as the ARM PL330 DMA Controller, which respects
this flag and will refuse to fetch DMA instructions from memory where the
XN flag has been set.
The flag can be used only if all IOMMU domains behind the container support
the IOMMU_NOEXEC flag. Also, if any mappings are created with the flag, any
new domains with devices will have to support it as well.
---
drivers/vfio/vfio_iommu_type1.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8b4202a..e225e8f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -569,6 +569,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
prot |= IOMMU_READ;
+ if (map->flags & VFIO_DMA_MAP_FLAG_NOEXEC) {
+ if (!vfio_domains_have_iommu_cap(iommu, IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+ prot |= IOMMU_NOEXEC;
+ }
+
if (!prot || !size || (size | iova | vaddr) & mask)
return -EINVAL;
I think this test needs to move above adding the NOEXEC flag, otherwise
we now allow mappings without read or write, which is an ABI change.
Ack.
Post by Alex Williamson
Post by Antonios Motakis
@@ -662,6 +668,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
dma = rb_entry(n, struct vfio_dma, node);
iova = dma->iova;
+ /*
+ * if any of the mappings to be replayed has the NOEXEC flag
+ * set, then the new iommu domain must support it
+ */
+ if ((dma->prot | IOMMU_NOEXEC) &&
I think you mean
& IOMMU_NOEXEC
Ack.
Post by Alex Williamson
Post by Antonios Motakis
+ !(domain->caps & IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+
The IOMMU_NOEXEC flag needs to be available for all the IOMMUs
of the container used.
But here you'll create heterogeneous containers so long as there are no
NOEXEC mappings. Is that intentional or a side effect of the above
masking bug?
Yeah, my intention was to not stop the user of having heterogeneous
containers, as long as he doesn't care about using the NOEXEC flag. As
soon as the user tries to apply this flag however, then it should be
supported by all the IOMMUs behind the container - otherwise it is not
enforceable.

Do you think we should change this behavior? I think most users will
not care about using this flag, and we should not stop them from
mixing containers.
Post by Alex Williamson
Post by Antonios Motakis
while (iova < dma->iova + dma->size) {
phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
size_t size;
@@ -749,6 +763,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
domain->caps |= IOMMU_CAP_CACHE_COHERENCY;
+ if (iommu_capable(bus, IOMMU_CAP_NOEXEC))
+ domain->caps |= IOMMU_CAP_NOEXEC;
+
/*
* Try to match an existing compatible domain. We don't want to
* preclude an IOMMU driver supporting multiple bus_types and being
@@ -900,6 +917,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
return 0;
return vfio_domains_have_iommu_cap(iommu,
IOMMU_CAP_CACHE_COHERENCY);
+ if (!iommu)
+ return 0;
+ return vfio_domains_have_iommu_cap(iommu,
+ IOMMU_CAP_NOEXEC);
return 0;
}
@@ -923,7 +945,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
+ VFIO_DMA_MAP_FLAG_WRITE |
+ VFIO_DMA_MAP_FLAG_NOEXEC;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
--
Antonios Motakis
Virtual Open Systems
Alex Williamson
2014-10-21 14:37:54 UTC
Permalink
Post by Antonios Motakis
On Mon, Oct 20, 2014 at 11:13 PM, Alex Williamson
Post by Alex Williamson
Post by Antonios Motakis
Some IOMMU drivers, such as the ARM SMMU driver, make available the
IOMMU_NOEXEC flag, to set the page tables for a device as XN (execute never).
This affects devices such as the ARM PL330 DMA Controller, which respects
this flag and will refuse to fetch DMA instructions from memory where the
XN flag has been set.
The flag can be used only if all IOMMU domains behind the container support
the IOMMU_NOEXEC flag. Also, if any mappings are created with the flag, any
new domains with devices will have to support it as well.
---
drivers/vfio/vfio_iommu_type1.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8b4202a..e225e8f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -569,6 +569,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
prot |= IOMMU_READ;
+ if (map->flags & VFIO_DMA_MAP_FLAG_NOEXEC) {
+ if (!vfio_domains_have_iommu_cap(iommu, IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+ prot |= IOMMU_NOEXEC;
+ }
+
if (!prot || !size || (size | iova | vaddr) & mask)
return -EINVAL;
I think this test needs to move above adding the NOEXEC flag, otherwise
we now allow mappings without read or write, which is an ABI change.
Ack.
Post by Alex Williamson
Post by Antonios Motakis
@@ -662,6 +668,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
dma = rb_entry(n, struct vfio_dma, node);
iova = dma->iova;
+ /*
+ * if any of the mappings to be replayed has the NOEXEC flag
+ * set, then the new iommu domain must support it
+ */
+ if ((dma->prot | IOMMU_NOEXEC) &&
I think you mean
& IOMMU_NOEXEC
Ack.
Post by Alex Williamson
Post by Antonios Motakis
+ !(domain->caps & IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+
The IOMMU_NOEXEC flag needs to be available for all the IOMMUs
of the container used.
But here you'll create heterogeneous containers so long as there are no
NOEXEC mappings. Is that intentional or a side effect of the above
masking bug?
Yeah, my intention was to not stop the user of having heterogeneous
containers, as long as he doesn't care about using the NOEXEC flag. As
soon as the user tries to apply this flag however, then it should be
supported by all the IOMMUs behind the container - otherwise it is not
enforceable.
Do you think we should change this behavior? I think most users will
not care about using this flag, and we should not stop them from
mixing containers.
I think that's a reasonable way to go, but let's add a comment in uapi
vfio.h describing that expectation. Thanks,

Alex
Post by Antonios Motakis
Post by Alex Williamson
Post by Antonios Motakis
while (iova < dma->iova + dma->size) {
phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
size_t size;
@@ -749,6 +763,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
domain->caps |= IOMMU_CAP_CACHE_COHERENCY;
+ if (iommu_capable(bus, IOMMU_CAP_NOEXEC))
+ domain->caps |= IOMMU_CAP_NOEXEC;
+
/*
* Try to match an existing compatible domain. We don't want to
* preclude an IOMMU driver supporting multiple bus_types and being
@@ -900,6 +917,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
return 0;
return vfio_domains_have_iommu_cap(iommu,
IOMMU_CAP_CACHE_COHERENCY);
+ if (!iommu)
+ return 0;
+ return vfio_domains_have_iommu_cap(iommu,
+ IOMMU_CAP_NOEXEC);
return 0;
}
@@ -923,7 +945,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
+ VFIO_DMA_MAP_FLAG_WRITE |
+ VFIO_DMA_MAP_FLAG_NOEXEC;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
Antonios Motakis
2014-10-21 14:41:51 UTC
Permalink
On Tue, Oct 21, 2014 at 4:37 PM, Alex Williamson
Post by Alex Williamson
Post by Antonios Motakis
On Mon, Oct 20, 2014 at 11:13 PM, Alex Williamson
Post by Alex Williamson
Post by Antonios Motakis
Some IOMMU drivers, such as the ARM SMMU driver, make available the
IOMMU_NOEXEC flag, to set the page tables for a device as XN (execute never).
This affects devices such as the ARM PL330 DMA Controller, which respects
this flag and will refuse to fetch DMA instructions from memory where the
XN flag has been set.
The flag can be used only if all IOMMU domains behind the container support
the IOMMU_NOEXEC flag. Also, if any mappings are created with the flag, any
new domains with devices will have to support it as well.
---
drivers/vfio/vfio_iommu_type1.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8b4202a..e225e8f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -569,6 +569,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
prot |= IOMMU_READ;
+ if (map->flags & VFIO_DMA_MAP_FLAG_NOEXEC) {
+ if (!vfio_domains_have_iommu_cap(iommu, IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+ prot |= IOMMU_NOEXEC;
+ }
+
if (!prot || !size || (size | iova | vaddr) & mask)
return -EINVAL;
I think this test needs to move above adding the NOEXEC flag, otherwise
we now allow mappings without read or write, which is an ABI change.
Ack.
Post by Alex Williamson
Post by Antonios Motakis
@@ -662,6 +668,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
dma = rb_entry(n, struct vfio_dma, node);
iova = dma->iova;
+ /*
+ * if any of the mappings to be replayed has the NOEXEC flag
+ * set, then the new iommu domain must support it
+ */
+ if ((dma->prot | IOMMU_NOEXEC) &&
I think you mean
& IOMMU_NOEXEC
Ack.
Post by Alex Williamson
Post by Antonios Motakis
+ !(domain->caps & IOMMU_CAP_NOEXEC))
+ return -EINVAL;
+
The IOMMU_NOEXEC flag needs to be available for all the IOMMUs
of the container used.
But here you'll create heterogeneous containers so long as there are no
NOEXEC mappings. Is that intentional or a side effect of the above
masking bug?
Yeah, my intention was to not stop the user of having heterogeneous
containers, as long as he doesn't care about using the NOEXEC flag. As
soon as the user tries to apply this flag however, then it should be
supported by all the IOMMUs behind the container - otherwise it is not
enforceable.
Do you think we should change this behavior? I think most users will
not care about using this flag, and we should not stop them from
mixing containers.
I think that's a reasonable way to go, but let's add a comment in uapi
vfio.h describing that expectation. Thanks,
Ok, will do.
Post by Alex Williamson
Alex
Post by Antonios Motakis
Post by Alex Williamson
Post by Antonios Motakis
while (iova < dma->iova + dma->size) {
phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
size_t size;
@@ -749,6 +763,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
domain->caps |= IOMMU_CAP_CACHE_COHERENCY;
+ if (iommu_capable(bus, IOMMU_CAP_NOEXEC))
+ domain->caps |= IOMMU_CAP_NOEXEC;
+
/*
* Try to match an existing compatible domain. We don't want to
* preclude an IOMMU driver supporting multiple bus_types and being
@@ -900,6 +917,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
return 0;
return vfio_domains_have_iommu_cap(iommu,
IOMMU_CAP_CACHE_COHERENCY);
+ if (!iommu)
+ return 0;
+ return vfio_domains_have_iommu_cap(iommu,
+ IOMMU_CAP_NOEXEC);
return 0;
}
@@ -923,7 +945,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
} else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
uint32_t mask = VFIO_DMA_MAP_FLAG_READ |
- VFIO_DMA_MAP_FLAG_WRITE;
+ VFIO_DMA_MAP_FLAG_WRITE |
+ VFIO_DMA_MAP_FLAG_NOEXEC;
minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
--
Antonios Motakis
Virtual Open Systems
Antonios Motakis
2014-10-13 13:09:06 UTC
Permalink
VFIO_IOMMU_TYPE1 keeps track for each domain it knows a list of protection
flags it always applies to all mappings in the domain. This is used for
domains that support IOMMU_CAP_CACHE_COHERENCY.

Refactor this slightly, by keeping track instead that a given domain
supports the capability, and applying the IOMMU_CACHE protection flag when
doing the actual DMA mappings.

This will allow us to reuse the behavior for IOMMU_CAP_NOEXEC, which we
also want to keep track of, but without applying it to all domains that
support it unless the user explicitly requests it.

Signed-off-by: Antonios Motakis <a.motakis-lrHrjnjw1UfHK3s98zE1ajGjJy/***@public.gmane.org>
---
drivers/vfio/vfio_iommu_type1.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 562f686..62a8b4d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -64,7 +64,7 @@ struct vfio_domain {
struct iommu_domain *domain;
struct list_head next;
struct list_head group_list;
- int prot; /* IOMMU_CACHE */
+ int caps;
};

struct vfio_dma {
@@ -485,7 +485,7 @@ static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
ret = iommu_map(domain->domain, iova,
(phys_addr_t)pfn << PAGE_SHIFT,
- PAGE_SIZE, prot | domain->prot);
+ PAGE_SIZE, prot);
if (ret)
break;
}
@@ -503,11 +503,16 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
int ret;

list_for_each_entry(d, &iommu->domain_list, next) {
+ int dprot = prot;
+
+ if (d->caps | IOMMU_CAP_CACHE_COHERENCY)
+ dprot |= IOMMU_CACHE;
+
ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
- npage << PAGE_SHIFT, prot | d->prot);
+ npage << PAGE_SHIFT, dprot);
if (ret) {
if (ret != -EBUSY ||
- map_try_harder(d, iova, pfn, npage, prot))
+ map_try_harder(d, iova, pfn, npage, dprot))
goto unwind;
}
}
@@ -620,6 +625,10 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *d;
struct rb_node *n;
int ret;
+ int dprot = 0;
+
+ if (domain->caps | IOMMU_CAP_CACHE_COHERENCY)
+ dprot |= IOMMU_CACHE;

/* Arbitrarily pick the first domain in the list for lookups */
d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
@@ -653,7 +662,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
size += PAGE_SIZE;

ret = iommu_map(domain->domain, iova, phys,
- size, dma->prot | domain->prot);
+ size, dma->prot | dprot);
if (ret)
return ret;

@@ -721,7 +730,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
}

if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
- domain->prot |= IOMMU_CACHE;
+ domain->caps |= IOMMU_CAP_CACHE_COHERENCY;

/*
* Try to match an existing compatible domain. We don't want to
@@ -732,7 +741,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
*/
list_for_each_entry(d, &iommu->domain_list, next) {
if (d->domain->ops == domain->domain->ops &&
- d->prot == domain->prot) {
+ d->caps == domain->caps) {
iommu_detach_group(domain->domain, iommu_group);
if (!iommu_attach_group(d->domain, iommu_group)) {
list_add(&group->next, &d->group_list);
@@ -865,7 +874,7 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)

mutex_lock(&iommu->lock);
list_for_each_entry(domain, &iommu->domain_list, next) {
- if (!(domain->prot & IOMMU_CACHE)) {
+ if (!(domain->caps & IOMMU_CAP_CACHE_COHERENCY)) {
ret = 0;
break;
}
--
2.1.1
Eric Auger
2014-10-22 09:08:58 UTC
Permalink
Post by Antonios Motakis
VFIO_IOMMU_TYPE1 keeps track for each domain it knows a list of protection
flags it always applies to all mappings in the domain. This is used for
domains that support IOMMU_CAP_CACHE_COHERENCY.
Refactor this slightly, by keeping track instead that a given domain
supports the capability, and applying the IOMMU_CACHE protection flag when
doing the actual DMA mappings.
This will allow us to reuse the behavior for IOMMU_CAP_NOEXEC, which we
also want to keep track of, but without applying it to all domains that
support it unless the user explicitly requests it.
---
drivers/vfio/vfio_iommu_type1.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 562f686..62a8b4d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -64,7 +64,7 @@ struct vfio_domain {
struct iommu_domain *domain;
struct list_head next;
struct list_head group_list;
- int prot; /* IOMMU_CACHE */
+ int caps;
};
struct vfio_dma {
@@ -485,7 +485,7 @@ static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
ret = iommu_map(domain->domain, iova,
(phys_addr_t)pfn << PAGE_SHIFT,
- PAGE_SIZE, prot | domain->prot);
+ PAGE_SIZE, prot);
if (ret)
break;
}
@@ -503,11 +503,16 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
int ret;
list_for_each_entry(d, &iommu->domain_list, next) {
+ int dprot = prot;
+
+ if (d->caps | IOMMU_CAP_CACHE_COHERENCY)
should be &
Post by Antonios Motakis
+ dprot |= IOMMU_CACHE;
+
ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
- npage << PAGE_SHIFT, prot | d->prot);
+ npage << PAGE_SHIFT, dprot);
if (ret) {
if (ret != -EBUSY ||
- map_try_harder(d, iova, pfn, npage, prot))
+ map_try_harder(d, iova, pfn, npage, dprot))
goto unwind;
}
}
@@ -620,6 +625,10 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *d;
struct rb_node *n;
int ret;
+ int dprot = 0;
+
+ if (domain->caps | IOMMU_CAP_CACHE_COHERENCY)
same to be fixed here.

With the 3 "|" corrections and "num--" fix in get_platform_resource, v8
is functional with Calxeda xgmac QEMU VFIO device.

Best Regards

Eric
Post by Antonios Motakis
+ dprot |= IOMMU_CACHE;
/* Arbitrarily pick the first domain in the list for lookups */
d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
@@ -653,7 +662,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
size += PAGE_SIZE;
ret = iommu_map(domain->domain, iova, phys,
- size, dma->prot | domain->prot);
+ size, dma->prot | dprot);
if (ret)
return ret;
@@ -721,7 +730,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
}
if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
- domain->prot |= IOMMU_CACHE;
+ domain->caps |= IOMMU_CAP_CACHE_COHERENCY;
/*
* Try to match an existing compatible domain. We don't want to
@@ -732,7 +741,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
*/
list_for_each_entry(d, &iommu->domain_list, next) {
if (d->domain->ops == domain->domain->ops &&
- d->prot == domain->prot) {
+ d->caps == domain->caps) {
iommu_detach_group(domain->domain, iommu_group);
if (!iommu_attach_group(d->domain, iommu_group)) {
list_add(&group->next, &d->group_list);
@@ -865,7 +874,7 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
mutex_lock(&iommu->lock);
list_for_each_entry(domain, &iommu->domain_list, next) {
- if (!(domain->prot & IOMMU_CACHE)) {
+ if (!(domain->caps & IOMMU_CAP_CACHE_COHERENCY)) {
ret = 0;
break;
}
Antonios Motakis
2014-10-13 13:09:07 UTC
Permalink
Replace the function vfio_domains_have_iommu_cache() with a more generic
function vfio_domains_have_iommu_cap() which allows to check all domains
of an vfio_iommu structure for a given cached capability.

Signed-off-by: Antonios Motakis <a.motakis-lrHrjnjw1UfHK3s98zE1ajGjJy/***@public.gmane.org>
---
drivers/vfio/vfio_iommu_type1.c | 37 +++++++++++++++++++------------------
1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 62a8b4d..8b4202a 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -80,6 +80,23 @@ struct vfio_group {
struct list_head next;
};

+static int vfio_domains_have_iommu_cap(struct vfio_iommu *iommu, int cap)
+{
+ struct vfio_domain *domain;
+ int ret = 1;
+
+ mutex_lock(&iommu->lock);
+ list_for_each_entry(domain, &iommu->domain_list, next) {
+ if (!(domain->caps & cap)) {
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&iommu->lock);
+
+ return ret;
+}
+
/*
* This code handles mapping and unmapping of user data buffers
* into DMA'ble space using the IOMMU
@@ -867,23 +884,6 @@ static void vfio_iommu_type1_release(void *iommu_data)
kfree(iommu);
}

-static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
-{
- struct vfio_domain *domain;
- int ret = 1;
-
- mutex_lock(&iommu->lock);
- list_for_each_entry(domain, &iommu->domain_list, next) {
- if (!(domain->caps & IOMMU_CAP_CACHE_COHERENCY)) {
- ret = 0;
- break;
- }
- }
- mutex_unlock(&iommu->lock);
-
- return ret;
-}
-
static long vfio_iommu_type1_ioctl(void *iommu_data,
unsigned int cmd, unsigned long arg)
{
@@ -898,7 +898,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
case VFIO_DMA_CC_IOMMU:
if (!iommu)
return 0;
- return vfio_domains_have_iommu_cache(iommu);
+ return vfio_domains_have_iommu_cap(iommu,
+ IOMMU_CAP_CACHE_COHERENCY);
default:
return 0;
}
--
2.1.1
Loading...