[PATCH] Optimize D-cache alias handling on fork

Virtually index, physically tagged cache architectures can get away
without cache flushing when forking.  This patch adds a new cache
flushing function flush_cache_dup_mm(struct mm_struct *) which for the
moment I've implemented to do the same thing on all architectures
except on MIPS where it's a no-op.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 53245c4..73e794f 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -179,10 +179,21 @@
 	lines associated with 'mm'.
 
 	This interface is used to handle whole address space
-	page table operations such as what happens during
-	fork, exit, and exec.
+	page table operations such as what happens during exit and exec.
 
-2) void flush_cache_range(struct vm_area_struct *vma,
+2) void flush_cache_dup_mm(struct mm_struct *mm)
+
+	This interface flushes an entire user address space from
+	the caches.  That is, after running, there will be no cache
+	lines associated with 'mm'.
+
+	This interface is used to handle whole address space
+	page table operations such as what happens during fork.
+
+	This option is separate from flush_cache_mm to allow some
+	optimizations for VIPT caches.
+
+3) void flush_cache_range(struct vm_area_struct *vma,
 			  unsigned long start, unsigned long end)
 
 	Here we are flushing a specific range of (user) virtual
@@ -199,7 +210,7 @@
 	call flush_cache_page (see below) for each entry which may be
 	modified.
 
-3) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
+4) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
 
 	This time we need to remove a PAGE_SIZE sized range
 	from the cache.  The 'vma' is the backing structure used by
@@ -220,7 +231,7 @@
 
 	This is used primarily during fault processing.
 
-4) void flush_cache_kmaps(void)
+5) void flush_cache_kmaps(void)
 
 	This routine need only be implemented if the platform utilizes
 	highmem.  It will be called right before all of the kmaps
@@ -232,7 +243,7 @@
 
 	This routing should be implemented in asm/highmem.h
 
-5) void flush_cache_vmap(unsigned long start, unsigned long end)
+6) void flush_cache_vmap(unsigned long start, unsigned long end)
    void flush_cache_vunmap(unsigned long start, unsigned long end)
 
 	Here in these two interfaces we are flushing a specific range
diff --git a/include/asm-alpha/cacheflush.h b/include/asm-alpha/cacheflush.h
index 805640b4..b686cc7 100644
--- a/include/asm-alpha/cacheflush.h
+++ b/include/asm-alpha/cacheflush.h
@@ -6,6 +6,7 @@
 /* Caches aren't brain-dead on the Alpha. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h
index f084564..378a3a2 100644
--- a/include/asm-arm/cacheflush.h
+++ b/include/asm-arm/cacheflush.h
@@ -319,6 +319,8 @@
 				unsigned long len, int write);
 #endif
 
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
 /*
  * flush_cache_user_range is used when we want to ensure that the
  * Harvard caches are synchronised for the user space address range.
diff --git a/include/asm-arm26/cacheflush.h b/include/asm-arm26/cacheflush.h
index 9c1b9c7..14ae15b 100644
--- a/include/asm-arm26/cacheflush.h
+++ b/include/asm-arm26/cacheflush.h
@@ -22,6 +22,7 @@
 
 #define flush_cache_all()                       do { } while (0)
 #define flush_cache_mm(mm)                      do { } while (0)
+#define flush_cache_dup_mm(mm)                  do { } while (0)
 #define flush_cache_range(vma,start,end)        do { } while (0)
 #define flush_cache_page(vma,vmaddr,pfn)        do { } while (0)
 #define flush_cache_vmap(start, end)		do { } while (0)
diff --git a/include/asm-avr32/cacheflush.h b/include/asm-avr32/cacheflush.h
index f1bf170..dfaaa88 100644
--- a/include/asm-avr32/cacheflush.h
+++ b/include/asm-avr32/cacheflush.h
@@ -87,6 +87,7 @@
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_cache_vmap(start, end)		do { } while (0)
diff --git a/include/asm-cris/cacheflush.h b/include/asm-cris/cacheflush.h
index 72cc71d..01af2de 100644
--- a/include/asm-cris/cacheflush.h
+++ b/include/asm-cris/cacheflush.h
@@ -9,6 +9,7 @@
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-frv/cacheflush.h b/include/asm-frv/cacheflush.h
index eaa5826..0250040 100644
--- a/include/asm-frv/cacheflush.h
+++ b/include/asm-frv/cacheflush.h
@@ -20,6 +20,7 @@
  */
 #define flush_cache_all()			do {} while(0)
 #define flush_cache_mm(mm)			do {} while(0)
+#define flush_cache_dup_mm(mm)			do {} while(0)
 #define flush_cache_range(mm, start, end)	do {} while(0)
 #define flush_cache_page(vma, vmaddr, pfn)	do {} while(0)
 #define flush_cache_vmap(start, end)		do {} while(0)
diff --git a/include/asm-h8300/cacheflush.h b/include/asm-h8300/cacheflush.h
index 1e4d95b..71210d1 100644
--- a/include/asm-h8300/cacheflush.h
+++ b/include/asm-h8300/cacheflush.h
@@ -12,6 +12,7 @@
 
 #define flush_cache_all()
 #define	flush_cache_mm(mm)
+#define	flush_cache_dup_mm(mm)		do { } while (0)
 #define	flush_cache_range(vma,a,b)
 #define	flush_cache_page(vma,p,pfn)
 #define	flush_dcache_page(page)
diff --git a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h
index 7199f7b..74e03c8 100644
--- a/include/asm-i386/cacheflush.h
+++ b/include/asm-i386/cacheflush.h
@@ -7,6 +7,7 @@
 /* Caches aren't brain-dead on the intel. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-ia64/cacheflush.h b/include/asm-ia64/cacheflush.h
index f2dacb4..4906916 100644
--- a/include/asm-ia64/cacheflush.h
+++ b/include/asm-ia64/cacheflush.h
@@ -18,6 +18,7 @@
 
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_icache_page(vma,page)		do { } while (0)
diff --git a/include/asm-m32r/cacheflush.h b/include/asm-m32r/cacheflush.h
index 8b261b4..56961a9 100644
--- a/include/asm-m32r/cacheflush.h
+++ b/include/asm-m32r/cacheflush.h
@@ -9,6 +9,7 @@
 #if defined(CONFIG_CHIP_M32700) || defined(CONFIG_CHIP_OPSP) || defined(CONFIG_CHIP_M32104)
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
@@ -29,6 +30,7 @@
 #elif defined(CONFIG_CHIP_M32102)
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
@@ -41,6 +43,7 @@
 #else
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-m68k/cacheflush.h b/include/asm-m68k/cacheflush.h
index 24d3ff4..16bf375 100644
--- a/include/asm-m68k/cacheflush.h
+++ b/include/asm-m68k/cacheflush.h
@@ -89,6 +89,8 @@
 		__flush_cache_030();
 }
 
+#define flush_cache_dup_mm(mm)			flush_cache_mm(mm)
+
 /* flush_cache_range/flush_cache_page must be macros to avoid
    a dependency on linux/mm.h, which includes this file... */
 static inline void flush_cache_range(struct vm_area_struct *vma,
diff --git a/include/asm-m68knommu/cacheflush.h b/include/asm-m68knommu/cacheflush.h
index c3aadf3..163dcb1 100644
--- a/include/asm-m68knommu/cacheflush.h
+++ b/include/asm-m68knommu/cacheflush.h
@@ -8,6 +8,7 @@
 
 #define flush_cache_all()			__flush_cache_all()
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	__flush_cache_all()
 #define flush_cache_page(vma, vmaddr)		do { } while (0)
 #define flush_dcache_range(start,len)		__flush_cache_all()
diff --git a/include/asm-mips/cacheflush.h b/include/asm-mips/cacheflush.h
index e3c9925..0ddada3 100644
--- a/include/asm-mips/cacheflush.h
+++ b/include/asm-mips/cacheflush.h
@@ -17,6 +17,7 @@
  *
  *  - flush_cache_all() flushes entire cache
  *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
  *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
  *  - flush_cache_range(vma, start, end) flushes a range of pages
  *  - flush_icache_range(start, end) flush a range of instructions
@@ -31,6 +32,7 @@
 extern void (*flush_cache_all)(void);
 extern void (*__flush_cache_all)(void);
 extern void (*flush_cache_mm)(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm)	do { (void) (mm); } while (0)
 extern void (*flush_cache_range)(struct vm_area_struct *vma,
 	unsigned long start, unsigned long end);
 extern void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn);
diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
index 2bc41f2..aedb051 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/include/asm-parisc/cacheflush.h
@@ -15,6 +15,8 @@
 #define flush_cache_mm(mm) flush_cache_all_local()
 #endif
 
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
 #define flush_kernel_dcache_range(start,size) \
 	flush_kernel_dcache_range_asm((start), (start)+(size));
 
diff --git a/include/asm-powerpc/cacheflush.h b/include/asm-powerpc/cacheflush.h
index 8a740c8..08e93e7 100644
--- a/include/asm-powerpc/cacheflush.h
+++ b/include/asm-powerpc/cacheflush.h
@@ -18,6 +18,7 @@
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_icache_page(vma, page)		do { } while (0)
diff --git a/include/asm-s390/cacheflush.h b/include/asm-s390/cacheflush.h
index e399a8b..f7cade8 100644
--- a/include/asm-s390/cacheflush.h
+++ b/include/asm-s390/cacheflush.h
@@ -7,6 +7,7 @@
 /* Caches aren't brain-dead on the s390. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-sh/cpu-sh2/cacheflush.h b/include/asm-sh/cpu-sh2/cacheflush.h
index f556fa8..2979efb 100644
--- a/include/asm-sh/cpu-sh2/cacheflush.h
+++ b/include/asm-sh/cpu-sh2/cacheflush.h
@@ -15,6 +15,7 @@
  *
  *  - flush_cache_all() flushes entire cache
  *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
  *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
  *  - flush_cache_range(vma, start, end) flushes a range of pages
  *
@@ -27,6 +28,7 @@
  */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-sh/cpu-sh3/cacheflush.h b/include/asm-sh/cpu-sh3/cacheflush.h
index 03fde97..f70d8ef7 100644
--- a/include/asm-sh/cpu-sh3/cacheflush.h
+++ b/include/asm-sh/cpu-sh3/cacheflush.h
@@ -15,6 +15,7 @@
  *
  *  - flush_cache_all() flushes entire cache
  *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_dup mm(mm) handles cache flushing when forking
  *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
  *  - flush_cache_range(vma, start, end) flushes a range of pages
  *
@@ -39,6 +40,7 @@
 
 void flush_cache_all(void);
 void flush_cache_mm(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
                               unsigned long end);
 void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn);
@@ -48,6 +50,7 @@
 #else
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-sh/cpu-sh4/cacheflush.h b/include/asm-sh/cpu-sh4/cacheflush.h
index 515fd57..b01a10f 100644
--- a/include/asm-sh/cpu-sh4/cacheflush.h
+++ b/include/asm-sh/cpu-sh4/cacheflush.h
@@ -18,6 +18,7 @@
  */
 void flush_cache_all(void);
 void flush_cache_mm(struct mm_struct *mm);
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 		       unsigned long end);
 void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/asm-sh64/cacheflush.h b/include/asm-sh64/cacheflush.h
index 55f71aa..1e53a47 100644
--- a/include/asm-sh64/cacheflush.h
+++ b/include/asm-sh64/cacheflush.h
@@ -21,6 +21,8 @@
 				    struct page *page, unsigned long addr,
 				    int len);
 
+#define flush_cache_dup_mm(mm)	flush_cache_mm(mm)
+
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
diff --git a/include/asm-sparc/cacheflush.h b/include/asm-sparc/cacheflush.h
index fc632f8..68ac109 100644
--- a/include/asm-sparc/cacheflush.h
+++ b/include/asm-sparc/cacheflush.h
@@ -48,6 +48,7 @@
 
 #define flush_cache_all() BTFIXUP_CALL(flush_cache_all)()
 #define flush_cache_mm(mm) BTFIXUP_CALL(flush_cache_mm)(mm)
+#define flush_cache_dup_mm(mm) BTFIXUP_CALL(flush_cache_mm)(mm)
 #define flush_cache_range(vma,start,end) BTFIXUP_CALL(flush_cache_range)(vma,start,end)
 #define flush_cache_page(vma,addr,pfn) BTFIXUP_CALL(flush_cache_page)(vma,addr)
 #define flush_icache_range(start, end)		do { } while (0)
diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h
index 745d1ab..122e405 100644
--- a/include/asm-sparc64/cacheflush.h
+++ b/include/asm-sparc64/cacheflush.h
@@ -12,6 +12,7 @@
 /* These are the same regardless of whether this is an SMP kernel or not. */
 #define flush_cache_mm(__mm) \
 	do { if ((__mm) == current->mm) flushw_user(); } while(0)
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
 #define flush_cache_range(vma, start, end) \
 	flush_cache_mm((vma)->vm_mm)
 #define flush_cache_page(vma, page, pfn) \
diff --git a/include/asm-v850/cacheflush.h b/include/asm-v850/cacheflush.h
index e1a87f8..9ece05a 100644
--- a/include/asm-v850/cacheflush.h
+++ b/include/asm-v850/cacheflush.h
@@ -24,6 +24,7 @@
    systems with MMUs, so we don't need them.  */
 #define flush_cache_all()			((void)0)
 #define flush_cache_mm(mm)			((void)0)
+#define flush_cache_dup_mm(mm)			((void)0)
 #define flush_cache_range(vma, start, end)	((void)0)
 #define flush_cache_page(vma, vmaddr, pfn)	((void)0)
 #define flush_dcache_page(page)			((void)0)
diff --git a/include/asm-x86_64/cacheflush.h b/include/asm-x86_64/cacheflush.h
index d32f7f5..ab1cb5c 100644
--- a/include/asm-x86_64/cacheflush.h
+++ b/include/asm-x86_64/cacheflush.h
@@ -7,6 +7,7 @@
 /* Caches aren't brain-dead on the intel. */
 #define flush_cache_all()			do { } while (0)
 #define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
 #define flush_cache_range(vma, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
 #define flush_dcache_page(page)			do { } while (0)
diff --git a/include/asm-xtensa/cacheflush.h b/include/asm-xtensa/cacheflush.h
index 337765b..22ef901 100644
--- a/include/asm-xtensa/cacheflush.h
+++ b/include/asm-xtensa/cacheflush.h
@@ -75,6 +75,7 @@
 
 #define flush_cache_all()		__flush_invalidate_cache_all();
 #define flush_cache_mm(mm)		__flush_invalidate_cache_all();
+#define flush_cache_dup_mm(mm)		__flush_invalidate_cache_all();
 
 #define flush_cache_vmap(start,end)	__flush_invalidate_cache_all();
 #define flush_cache_vunmap(start,end)	__flush_invalidate_cache_all();
@@ -88,6 +89,7 @@
 
 #define flush_cache_all()				do { } while (0)
 #define flush_cache_mm(mm)				do { } while (0)
+#define flush_cache_dup_mm(mm)				do { } while (0)
 
 #define flush_cache_vmap(start,end)			do { } while (0)
 #define flush_cache_vunmap(start,end)			do { } while (0)
diff --git a/kernel/fork.c b/kernel/fork.c
index d16c566..fc723e5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -203,7 +203,7 @@
 	struct mempolicy *pol;
 
 	down_write(&oldmm->mmap_sem);
-	flush_cache_mm(oldmm);
+	flush_cache_dup_mm(oldmm);
 	/*
 	 * Not linked in yet - no deadlock potential:
 	 */