rcu: Add a TINY_PREEMPT_RCU

Implement a small-memory-footprint uniprocessor-only implementation of
preemptible RCU.  This implementation uses but a single blocked-tasks
list rather than the combinatorial number used per leaf rcu_node by
TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies
processing.  This version also takes advantage of uniprocessor execution
to accelerate grace periods in the case where there are no readers.

The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU.

This implementation is a step towards having RCU implementation driven
off of the SMP and PREEMPT kernel configuration variables, which can
happen once this implementation has accumulated sufficient experience.

Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as
suggested by Steve Rostedt in order to avoid the compiler-reordering
issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183).

As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte
savings compared to CONFIG_TREE_PREEMPT_RCU.  Of course, for non-real-time
workloads, CONFIG_TINY_RCU is even better.

	CONFIG_TREE_PREEMPT_RCU

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	   6170	    825	     28	   7023	   kernel/rcutree.o
				   ----
				   7026    Total

	CONFIG_TINY_PREEMPT_RCU

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	   2081	     81	      8	   2170	   kernel/rcutiny.o
				   ----
				   2183    Total

	CONFIG_TINY_RCU (non-preemptible)

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	    719	     25	      0	    744	   kernel/rcutiny.o
				    ---
				    757    Total

Requested-by: Loïc Minier <loic.minier@canonical.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 196ec02..d806735 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,6 +59,14 @@
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+/* Forward declarations for rcutiny_plugin.h. */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static void __call_rcu(struct rcu_head *head,
+		       void (*func)(struct rcu_head *rcu),
+		       struct rcu_ctrlblk *rcp);
+
+#include "rcutiny_plugin.h"
+
 #ifdef CONFIG_NO_HZ
 
 static long rcu_dynticks_nesting = 1;
@@ -140,6 +148,7 @@
 		rcu_sched_qs(cpu);
 	else if (!in_softirq())
 		rcu_bh_qs(cpu);
+	rcu_preempt_check_callbacks();
 }
 
 /*
@@ -162,6 +171,7 @@
 	*rcp->donetail = NULL;
 	if (rcp->curtail == rcp->donetail)
 		rcp->curtail = &rcp->rcucblist;
+	rcu_preempt_remove_callbacks(rcp);
 	rcp->donetail = &rcp->rcucblist;
 	local_irq_restore(flags);
 
@@ -182,6 +192,7 @@
 {
 	__rcu_process_callbacks(&rcu_sched_ctrlblk);
 	__rcu_process_callbacks(&rcu_bh_ctrlblk);
+	rcu_preempt_process_callbacks();
 }
 
 /*
@@ -223,15 +234,15 @@
 }
 
 /*
- * Post an RCU callback to be invoked after the end of an RCU grace
+ * Post an RCU callback to be invoked after the end of an RCU-sched grace
  * period.  But since we have but one CPU, that would be after any
  * quiescent state.
  */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
 	__call_rcu(head, func, &rcu_sched_ctrlblk);
 }
-EXPORT_SYMBOL_GPL(call_rcu);
+EXPORT_SYMBOL_GPL(call_rcu_sched);
 
 /*
  * Post an RCU bottom-half callback to be invoked after any subsequent
@@ -243,20 +254,6 @@
 }
 EXPORT_SYMBOL_GPL(call_rcu_bh);
 
-void rcu_barrier(void)
-{
-	struct rcu_synchronize rcu;
-
-	init_rcu_head_on_stack(&rcu.head);
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-	destroy_rcu_head_on_stack(&rcu.head);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
 void rcu_barrier_bh(void)
 {
 	struct rcu_synchronize rcu;
@@ -289,5 +286,3 @@
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
-
-#include "rcutiny_plugin.h"