[S390] pgtables: Fix race in enable_sie vs. page table ops
The current enable_sie code sets the mm->context.pgstes bit to tell
dup_mm that the new mm should have extended page tables. This bit is also
used by the s390 specific page table primitives to decide about the page
table layout - which means context.pgstes has two meanings. This can cause
any kind of bugs. For example - e.g. shrink_zone can call
ptep_clear_flush_young while enable_sie is running. ptep_clear_flush_young
will test for context.pgstes. Since enable_sie changed that value of the old
struct mm without changing the page table layout ptep_clear_flush_young will
do the wrong thing.
The solution is to split pgstes into two bits
- one for the allocation
- one for the current state
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 3d98ba8..ef3635b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -169,7 +169,7 @@
unsigned long *table;
unsigned long bits;
- bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
spin_lock(&mm->page_table_lock);
page = NULL;
if (!list_empty(&mm->context.pgtable_list)) {
@@ -186,7 +186,7 @@
pgtable_page_ctor(page);
page->flags &= ~FRAG_MASK;
table = (unsigned long *) page_to_phys(page);
- if (mm->context.pgstes)
+ if (mm->context.has_pgste)
clear_table_pgstes(table);
else
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
@@ -210,7 +210,7 @@
struct page *page;
unsigned long bits;
- bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock);
@@ -257,7 +257,7 @@
struct mm_struct *mm, *old_mm;
/* Do we have pgstes? if yes, we are done */
- if (tsk->mm->context.pgstes)
+ if (tsk->mm->context.has_pgste)
return 0;
/* lets check if we are allowed to replace the mm */
@@ -269,14 +269,14 @@
}
task_unlock(tsk);
- /* we copy the mm with pgstes enabled */
- tsk->mm->context.pgstes = 1;
+ /* we copy the mm and let dup_mm create the page tables with_pgstes */
+ tsk->mm->context.alloc_pgste = 1;
mm = dup_mm(tsk);
- tsk->mm->context.pgstes = 0;
+ tsk->mm->context.alloc_pgste = 0;
if (!mm)
return -ENOMEM;
- /* Now lets check again if somebody attached ptrace etc */
+ /* Now lets check again if something happened */
task_lock(tsk);
if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {