[PATCH] files: files struct with RCU
Patch to eliminate struct files_struct.file_lock spinlock on the reader side
and use rcu refcounting rcuref_xxx api for the f_count refcounter. The
updates to the fdtable are done by allocating a new fdtable structure and
setting files->fdt to point to the new structure. The fdtable structure is
protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that
are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A
global list of fdtable to be freed is not scalable, so we use a per-cpu list.
If keventd is already handling the current cpu's work, we use a timer to defer
queueing of that work.
Since the last publication, this patch has been re-written to avoid using
explicit memory barriers and use rcu_assign_pointer(), rcu_dereference()
premitives instead. This required that the fd information is kept in a
separate structure (fdtable) and updated atomically.
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/fs/file_table.c b/fs/file_table.c
index 43e9e17..86ec8ae 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/eventpoll.h>
+#include <linux/rcupdate.h>
#include <linux/mount.h>
#include <linux/cdev.h>
#include <linux/fsnotify.h>
@@ -53,9 +54,15 @@
spin_unlock_irqrestore(&filp_count_lock, flags);
}
+static inline void file_free_rcu(struct rcu_head *head)
+{
+ struct file *f = container_of(head, struct file, f_rcuhead);
+ kmem_cache_free(filp_cachep, f);
+}
+
static inline void file_free(struct file *f)
{
- kmem_cache_free(filp_cachep, f);
+ call_rcu(&f->f_rcuhead, file_free_rcu);
}
/* Find an unused file structure and return a pointer to it.
@@ -110,7 +117,7 @@
void fastcall fput(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count))
+ if (rcuref_dec_and_test(&file->f_count))
__fput(file);
}
@@ -156,11 +163,17 @@
struct file *file;
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
- if (file)
- get_file(file);
- spin_unlock(&files->file_lock);
+ if (file) {
+ if (!rcuref_inc_lf(&file->f_count)) {
+ /* File object ref couldn't be taken */
+ rcu_read_unlock();
+ return NULL;
+ }
+ }
+ rcu_read_unlock();
+
return file;
}
@@ -182,21 +195,25 @@
if (likely((atomic_read(&files->count) == 1))) {
file = fcheck_files(files, fd);
} else {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- get_file(file);
- *fput_needed = 1;
+ if (rcuref_inc_lf(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
}
+
return file;
}
void put_filp(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count)) {
+ if (rcuref_dec_and_test(&file->f_count)) {
security_file_free(file);
file_kill(file);
file_free(file);
@@ -257,4 +274,5 @@
files_stat.max_files = n;
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
+ files_defer_init();
}