userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges
Add routine userfaultfd_huge_must_wait which has the same functionality
as the existing userfaultfd_must_wait routine. Only difference is that
new routine must handle page table structure for hugepmd vmas.
Link: http://lkml.kernel.org/r/20161216144821.5183-24-aarcange@redhat.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5139d05..11b5e65 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -202,6 +202,49 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
return msg;
}
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Same functionality as userfaultfd_must_wait below with modifications for
+ * hugepmd ranges.
+ */
+static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ unsigned long address,
+ unsigned long flags,
+ unsigned long reason)
+{
+ struct mm_struct *mm = ctx->mm;
+ pte_t *pte;
+ bool ret = true;
+
+ VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
+
+ pte = huge_pte_offset(mm, address);
+ if (!pte)
+ goto out;
+
+ ret = false;
+
+ /*
+ * Lockless access: we're in a wait_event so it's ok if it
+ * changes under us.
+ */
+ if (huge_pte_none(*pte))
+ ret = true;
+ if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP))
+ ret = true;
+out:
+ return ret;
+}
+#else
+static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ unsigned long address,
+ unsigned long flags,
+ unsigned long reason)
+{
+ return false; /* should never get here */
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
/*
* Verify the pagetables are still not ok after having reigstered into
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
@@ -378,8 +421,12 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
set_current_state(blocking_state);
spin_unlock(&ctx->fault_pending_wqh.lock);
- must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
- reason);
+ if (!is_vm_hugetlb_page(vmf->vma))
+ must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
+ reason);
+ else
+ must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
+ vmf->flags, reason);
up_read(&mm->mmap_sem);
if (likely(must_wait && !ACCESS_ONCE(ctx->released) &&