Discussion:
[PATCH v12 5/6] arm: KVM: dirty log read write protect support
Mario Smarduch
2014-10-22 22:38:45 UTC
Permalink
This patch adds support to track VM dirty pages, between dirty log reads. Pages
that have been dirtied since last log read are write protected again, in
preparation of next dirty log read. In addition ARMv7 dirty log read function
is pushed up to generic layer.

Signed-off-by: Mario Smarduch <***@samsung.com>
---
arch/arm/kvm/Kconfig | 1 +
arch/arm/kvm/Makefile | 1 +
arch/arm/kvm/arm.c | 2 ++
arch/arm/kvm/mmu.c | 22 ++++++++++++++++++++++
4 files changed, 26 insertions(+)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index a099df4..9a0bd8e 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -24,6 +24,7 @@ config KVM
select KVM_MMIO
select KVM_ARM_HOST
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
+ select KVM_GENERIC_DIRTYLOG
depends on ARM_VIRT_EXT && ARM_LPAE
---help---
Support hosting virtualized guest machines. You will also
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f7057ed..3480897 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -23,3 +23,4 @@ obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+obj-$(CONFIG_KVM_GENERIC_DIRTYLOG) += $(KVM)/dirtylog.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cd..94bf645 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -737,10 +737,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
}

+#ifdef CONFIG_ARM64
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
return -EINVAL;
}
+#endif

static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 3b86522..e348386 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -872,6 +872,28 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
+
+/**
+ * kvm_mmu_write_protected_pt_masked() - write protect dirty pages set in mask
+ * @kvm: The KVM pointer
+ * @slot: The memory slot associated with mask
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
+ * slot to be write protected
+ *
+ * Walks bits set in mask write protects the associated pte's. Caller must
+ * acquire kvm_mmu_lock.
+ */
+void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask)
+{
+ phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
+ phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
+ phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
+
+ stage2_wp_range(kvm, start, end);
+}
#endif

static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
--
1.7.9.5
Mario Smarduch
2014-10-22 22:38:46 UTC
Permalink
This patch adds support for handling 2nd stage page faults during migration,
it disables faulting in huge pages, and dissolves huge pages to page tables.
In case migration is canceled huge pages are used again.

Reviewed-by: Christoffer Dall <christoffer.dall at linaro.org>
Signed-off-by: Mario Smarduch <***@samsung.com>
---
arch/arm/kvm/mmu.c | 47 ++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e348386..b00dec6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -47,6 +47,15 @@ static phys_addr_t hyp_idmap_vector;
#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
#define kvm_pud_huge(_x) pud_huge(_x)

+static bool kvm_get_logging_state(struct kvm_memory_slot *memslot)
+{
+#ifdef CONFIG_ARM
+ return !!memslot->dirty_bitmap;
+#else
+ return false;
+#endif
+}
+
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
/*
@@ -626,7 +635,8 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
}

static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
- phys_addr_t addr, const pte_t *new_pte, bool iomap)
+ phys_addr_t addr, const pte_t *new_pte, bool iomap,
+ bool logging_active)
{
pmd_t *pmd;
pte_t *pte, old_pte;
@@ -641,6 +651,18 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
return 0;
}

+ /*
+ * While dirty memory logging, clear PMD entry for huge page and split
+ * into smaller pages, to track dirty memory at page granularity.
+ */
+ if (logging_active && kvm_pmd_huge(*pmd)) {
+ phys_addr_t ipa = pmd_pfn(*pmd) << PAGE_SHIFT;
+
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, ipa);
+ put_page(virt_to_page(pmd));
+ }
+
/* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
@@ -693,7 +715,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
+ ret = stage2_set_pte(kvm, &cache, addr, &pte, true, false);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -910,6 +932,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct vm_area_struct *vma;
pfn_t pfn;
pgprot_t mem_type = PAGE_S2;
+ bool logging_active = kvm_get_logging_state(memslot);

write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
if (fault_status == FSC_PERM && !write_fault) {
@@ -920,7 +943,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(&current->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
- if (is_vm_hugetlb_page(vma)) {
+ if (is_vm_hugetlb_page(vma) && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
@@ -966,7 +989,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
- if (!hugetlb && !force_pte)
+ if (!hugetlb && !force_pte && !logging_active)
hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);

if (hugetlb) {
@@ -986,10 +1009,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
- mem_type == PAGE_S2_DEVICE);
+ mem_type == PAGE_S2_DEVICE,
+ logging_active);
}

-
+ if (write_fault)
+ mark_page_dirty(kvm, gfn);
out_unlock:
spin_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
@@ -1139,7 +1164,15 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
pte_t *pte = (pte_t *)data;

- stage2_set_pte(kvm, NULL, gpa, pte, false);
+ /*
+ * We can always call stage2_set_pte with logging_active == false,
+ * because MMU notifiers will have unmapped a huge PMD before calling
+ * ->change_pte() (which in turn calls kvm_set_spte_hva()) and therefore
+ * stage2_set_pte() never needs to clear out a huge PMD through this
+ * calling path.
+ */
+
+ stage2_set_pte(kvm, NULL, gpa, pte, false, false);
}
--
1.7.9.5
Loading...