Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
  [PATCH] fix sysctl_nr_open bugs
  [PATCH] sanitize anon_inode_getfd()
  [PATCH] split linux/file.h
  [PATCH] make osf_select() use core_sys_select()
  [PATCH] remove horrors with irix tty ioctls handling
  [PATCH] fix file and descriptor handling in perfmon
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9fee37e..32ca1b9 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -981,27 +981,18 @@
 osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
 	   struct timeval32 __user *tvp)
 {
-	fd_set_bits fds;
-	char *bits;
-	size_t size;
-	long timeout;
-	int ret = -EINVAL;
-	struct fdtable *fdt;
-	int max_fds;
-
-	timeout = MAX_SCHEDULE_TIMEOUT;
+	s64 timeout = MAX_SCHEDULE_TIMEOUT;
 	if (tvp) {
 		time_t sec, usec;
 
 		if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp))
 		    || __get_user(sec, &tvp->tv_sec)
 		    || __get_user(usec, &tvp->tv_usec)) {
-		    	ret = -EFAULT;
-			goto out_nofds;
+		    	return -EFAULT;
 		}
 
 		if (sec < 0 || usec < 0)
-			goto out_nofds;
+			return -EINVAL;
 
 		if ((unsigned long) sec < MAX_SELECT_SECONDS) {
 			timeout = (usec + 1000000/HZ - 1) / (1000000/HZ);
@@ -1009,60 +1000,8 @@
 		}
 	}
 
-	rcu_read_lock();
-	fdt = files_fdtable(current->files);
-	max_fds = fdt->max_fds;
-	rcu_read_unlock();
-	if (n < 0 || n > max_fds)
-		goto out_nofds;
-
-	/*
-	 * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
-	 * since we used fdset we need to allocate memory in units of
-	 * long-words. 
-	 */
-	ret = -ENOMEM;
-	size = FDS_BYTES(n);
-	bits = kmalloc(6 * size, GFP_KERNEL);
-	if (!bits)
-		goto out_nofds;
-	fds.in      = (unsigned long *)  bits;
-	fds.out     = (unsigned long *) (bits +   size);
-	fds.ex      = (unsigned long *) (bits + 2*size);
-	fds.res_in  = (unsigned long *) (bits + 3*size);
-	fds.res_out = (unsigned long *) (bits + 4*size);
-	fds.res_ex  = (unsigned long *) (bits + 5*size);
-
-	if ((ret = get_fd_set(n, inp->fds_bits, fds.in)) ||
-	    (ret = get_fd_set(n, outp->fds_bits, fds.out)) ||
-	    (ret = get_fd_set(n, exp->fds_bits, fds.ex)))
-		goto out;
-	zero_fd_set(n, fds.res_in);
-	zero_fd_set(n, fds.res_out);
-	zero_fd_set(n, fds.res_ex);
-
-	ret = do_select(n, &fds, &timeout);
-
 	/* OSF does not copy back the remaining time.  */
-
-	if (ret < 0)
-		goto out;
-	if (!ret) {
-		ret = -ERESTARTNOHAND;
-		if (signal_pending(current))
-			goto out;
-		ret = 0;
-	}
-
-	if (set_fd_set(n, inp->fds_bits, fds.res_in) ||
-	    set_fd_set(n, outp->fds_bits, fds.res_out) ||
-	    set_fd_set(n, exp->fds_bits, fds.res_ex))
-		ret = -EFAULT;
-
- out:
-	kfree(bits);
- out_nofds:
-	return ret;
+	return core_sys_select(n, inp, outp, exp, &timeout);
 }
 
 struct rusage32 {
diff --git a/arch/mips/kernel/irixioctl.c b/arch/mips/kernel/irixioctl.c
index 2bde200d..b39bdba 100644
--- a/arch/mips/kernel/irixioctl.c
+++ b/arch/mips/kernel/irixioctl.c
@@ -27,33 +27,6 @@
 	cc_t c_cc[NCCS];
 };
 
-extern void start_tty(struct tty_struct *tty);
-static struct tty_struct *get_tty(int fd)
-{
-	struct file *filp;
-	struct tty_struct *ttyp = NULL;
-
-	rcu_read_lock();
-	filp = fcheck(fd);
-	if(filp && filp->private_data) {
-		ttyp = (struct tty_struct *) filp->private_data;
-
-		if(ttyp->magic != TTY_MAGIC)
-			ttyp =NULL;
-	}
-	rcu_read_unlock();
-	return ttyp;
-}
-
-static struct tty_struct *get_real_tty(struct tty_struct *tp)
-{
-	if (tp->driver->type == TTY_DRIVER_TYPE_PTY &&
-	   tp->driver->subtype == PTY_TYPE_MASTER)
-		return tp->link;
-	else
-		return tp;
-}
-
 asmlinkage int irix_ioctl(int fd, unsigned long cmd, unsigned long arg)
 {
 	struct tty_struct *tp, *rtp;
@@ -146,34 +119,24 @@
 		error = sys_ioctl(fd, TIOCNOTTY, arg);
 		break;
 
-	case 0x00007416:
+	case 0x00007416: {
+		pid_t pid;
 #ifdef DEBUG_IOCTLS
 		printk("TIOCGSID, %08lx) ", arg);
 #endif
-		tp = get_tty(fd);
-		if(!tp) {
-			error = -EINVAL;
-			break;
-		}
-		rtp = get_real_tty(tp);
-#ifdef DEBUG_IOCTLS
-		printk("rtp->session=%d ", rtp->session);
-#endif
-		error = put_user(rtp->session, (unsigned long __user *) arg);
+		old_fs = get_fs(); set_fs(get_ds());
+		error = sys_ioctl(fd, TIOCGSID, (unsigned long)&pid);
+		set_fs(old_fs);
+		if (!error)
+			error = put_user(pid, (unsigned long __user *) arg);
 		break;
-
+	}
 	case 0x746e:
 		/* TIOCSTART, same effect as hitting ^Q */
 #ifdef DEBUG_IOCTLS
 		printk("TIOCSTART, %08lx) ", arg);
 #endif
-		tp = get_tty(fd);
-		if(!tp) {
-			error = -EINVAL;
-			break;
-		}
-		rtp = get_real_tty(tp);
-		start_tty(rtp);
+		error = sys_ioctl(fd, TCXONC, TCOON);
 		break;
 
 	case 0x20006968:
diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c
index 998c4ef..ceb62dc 100644
--- a/arch/mips/kernel/kspd.c
+++ b/arch/mips/kernel/kspd.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/unistd.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/syscalls.h>
 #include <linux/workqueue.h>
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index b962c3a..af116aa 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -22,6 +22,7 @@
 
 #include <linux/elf.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/module.h>
diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 6342b05..3582f43 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -11,6 +11,7 @@
 
 #include <linux/audit.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/tty.h>
 
 struct tty_audit_buf {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 1d298c2..49c1a22 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -78,6 +78,7 @@
 #include <linux/tty_flip.h>
 #include <linux/devpts_fs.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/console.h>
 #include <linux/timer.h>
 #include <linux/ctype.h>
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f42be06..977ef20 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,9 +57,6 @@
  *                    anonymous inode, and a dentry that describe the "class"
  *                    of the file
  *
- * @pfd:     [out]   pointer to the file descriptor
- * @dpinode: [out]   pointer to the inode
- * @pfile:   [out]   pointer to the file struct
  * @name:    [in]    name of the "class" of the new file
  * @fops     [in]    file operations for the new file
  * @priv     [in]    private data for the new file (will be file's private_data)
@@ -68,10 +65,9 @@
  * that do not need to have a full-fledged inode in order to operate correctly.
  * All the files created with anon_inode_getfd() will share a single inode,
  * hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup.
+ * setup.  Returns new descriptor or -error.
  */
-int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
-		     const char *name, const struct file_operations *fops,
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv)
 {
 	struct qstr this;
@@ -125,10 +121,7 @@
 
 	fd_install(fd, file);
 
-	*pfd = fd;
-	*pinode = anon_inode_inode;
-	*pfile = file;
-	return 0;
+	return fd;
 
 err_dput:
 	dput(dentry);
diff --git a/fs/compat.c b/fs/compat.c
index 139dc93..332a869 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -24,6 +24,7 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/vfs.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
diff --git a/fs/dnotify.c b/fs/dnotify.c
index eaecc4c..676073b 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,7 +20,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
-#include <linux/file.h>
+#include <linux/fdtable.h>
 
 int dir_notify_enable __read_mostly = 1;
 
diff --git a/fs/eventfd.c b/fs/eventfd.c
index a9f130c..343942d 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -200,10 +200,8 @@
 
 asmlinkage long sys_eventfd(unsigned int count)
 {
-	int error, fd;
+	int fd;
 	struct eventfd_ctx *ctx;
-	struct file *file;
-	struct inode *inode;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -216,12 +214,9 @@
 	 * When we call this, the initialization must be complete, since
 	 * anon_inode_getfd() will install the fd.
 	 */
-	error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]",
-				 &eventfd_fops, ctx);
-	if (!error)
-		return fd;
-
-	kfree(ctx);
-	return error;
+	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
+	if (fd < 0)
+		kfree(ctx);
+	return fd;
 }
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 221086f..990c01d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1050,8 +1050,6 @@
 {
 	int error, fd = -1;
 	struct eventpoll *ep;
-	struct inode *inode;
-	struct file *file;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
 		     current, size));
@@ -1061,29 +1059,24 @@
 	 * structure ( "struct eventpoll" ).
 	 */
 	error = -EINVAL;
-	if (size <= 0 || (error = ep_alloc(&ep)) != 0)
+	if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
+		fd = error;
 		goto error_return;
+	}
 
 	/*
 	 * Creates all the items needed to setup an eventpoll file. That is,
-	 * a file structure, and inode and a free file descriptor.
+	 * a file structure and a free file descriptor.
 	 */
-	error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]",
-				 &eventpoll_fops, ep);
-	if (error)
-		goto error_free;
+	fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
+	if (fd < 0)
+		ep_free(ep);
 
+error_return:
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
 		     current, size, fd));
 
 	return fd;
-
-error_free:
-	ep_free(ep);
-error_return:
-	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-		     current, size, error));
-	return error;
 }
 
 /*
diff --git a/fs/exec.c b/fs/exec.c
index 9f9f931..aeaa979 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
 
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/mman.h>
 #include <linux/a.out.h>
 #include <linux/stat.h>
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3f3ac63..bfd7765 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/capability.h>
 #include <linux/dnotify.h>
 #include <linux/smp_lock.h>
diff --git a/fs/file.c b/fs/file.c
index 5110acb1..4c6f0ea 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
@@ -149,8 +150,16 @@
 	nr /= (1024 / sizeof(struct file *));
 	nr = roundup_pow_of_two(nr + 1);
 	nr *= (1024 / sizeof(struct file *));
-	if (nr > sysctl_nr_open)
-		nr = sysctl_nr_open;
+	/*
+	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
+	 * had been set lower between the check in expand_files() and here.  Deal
+	 * with that in caller, it's cheaper that way.
+	 *
+	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
+	 * bitmaps handling below becomes unpleasant, to put it mildly...
+	 */
+	if (unlikely(nr > sysctl_nr_open))
+		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
 
 	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
 	if (!fdt)
@@ -199,6 +208,16 @@
 	if (!new_fdt)
 		return -ENOMEM;
 	/*
+	 * extremely unlikely race - sysctl_nr_open decreased between the check in
+	 * caller and alloc_fdtable().  Cheaper to catch it here...
+	 */
+	if (unlikely(new_fdt->max_fds <= nr)) {
+		free_fdarr(new_fdt);
+		free_fdset(new_fdt);
+		kfree(new_fdt);
+		return -EMFILE;
+	}
+	/*
 	 * Check again since another task may have expanded the fd table while
 	 * we dropped the lock
 	 */
diff --git a/fs/file_table.c b/fs/file_table.c
index 7a0a9b8..8308422 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/fs/locks.c b/fs/locks.c
index 44d9a6a..663c069 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -116,6 +116,7 @@
 
 #include <linux/capability.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
diff --git a/fs/open.c b/fs/open.c
index 7af1f05..a145008 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,6 +7,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/module.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index c135cbd..dca997a 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -73,6 +73,7 @@
 #include <linux/signal.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/times.h>
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fcf02f2..808cbdc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
 #include <linux/init.h>
 #include <linux/capability.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
diff --git a/fs/select.c b/fs/select.c
index 2c29214..8dda969 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -21,6 +21,7 @@
 #include <linux/poll.h>
 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 
@@ -298,7 +299,7 @@
 #define MAX_SELECT_SECONDS \
 	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
 
-static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			   fd_set __user *exp, s64 *timeout)
 {
 	fd_set_bits fds;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8ead0db..6197256 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -207,11 +207,8 @@
 
 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
 {
-	int error;
 	sigset_t sigmask;
 	struct signalfd_ctx *ctx;
-	struct file *file;
-	struct inode *inode;
 
 	if (sizemask != sizeof(sigset_t) ||
 	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -230,12 +227,11 @@
 		 * When we call this, the initialization must be complete, since
 		 * anon_inode_getfd() will install the fd.
 		 */
-		error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]",
-					 &signalfd_fops, ctx);
-		if (error)
-			goto err_fdalloc;
+		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
+		if (ufd < 0)
+			kfree(ctx);
 	} else {
-		file = fget(ufd);
+		struct file *file = fget(ufd);
 		if (!file)
 			return -EBADF;
 		ctx = file->private_data;
@@ -252,9 +248,4 @@
 	}
 
 	return ufd;
-
-err_fdalloc:
-	kfree(ctx);
-	return error;
 }
-
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 5400524..d87d354 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -181,10 +181,8 @@
 
 asmlinkage long sys_timerfd_create(int clockid, int flags)
 {
-	int error, ufd;
+	int ufd;
 	struct timerfd_ctx *ctx;
-	struct file *file;
-	struct inode *inode;
 
 	if (flags)
 		return -EINVAL;
@@ -200,12 +198,9 @@
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
 
-	error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
-				 &timerfd_fops, ctx);
-	if (error) {
+	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
+	if (ufd < 0)
 		kfree(ctx);
-		return error;
-	}
 
 	return ufd;
 }
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index b2e1ba3..6129e58 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -8,8 +8,7 @@
 #ifndef _LINUX_ANON_INODES_H
 #define _LINUX_ANON_INODES_H
 
-int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
-		     const char *name, const struct file_operations *fops,
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv);
 
 #endif /* _LINUX_ANON_INODES_H */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
new file mode 100644
index 0000000..a118f3c
--- /dev/null
+++ b/include/linux/fdtable.h
@@ -0,0 +1,99 @@
+/*
+ * descriptor table internals; you almost certainly want file.h instead.
+ */
+
+#ifndef __LINUX_FDTABLE_H
+#define __LINUX_FDTABLE_H
+
+#include <asm/atomic.h>
+#include <linux/posix_types.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+/*
+ * The default fd array needs to be at least BITS_PER_LONG,
+ * as this is the granularity returned by copy_fdset().
+ */
+#define NR_OPEN_DEFAULT BITS_PER_LONG
+
+/*
+ * The embedded_fd_set is a small fd_set,
+ * suitable for most tasks (which open <= BITS_PER_LONG files)
+ */
+struct embedded_fd_set {
+	unsigned long fds_bits[1];
+};
+
+struct fdtable {
+	unsigned int max_fds;
+	struct file ** fd;      /* current fd array */
+	fd_set *close_on_exec;
+	fd_set *open_fds;
+	struct rcu_head rcu;
+	struct fdtable *next;
+};
+
+/*
+ * Open file table structure
+ */
+struct files_struct {
+  /*
+   * read mostly part
+   */
+	atomic_t count;
+	struct fdtable *fdt;
+	struct fdtable fdtab;
+  /*
+   * written part on a separate cache line in SMP
+   */
+	spinlock_t file_lock ____cacheline_aligned_in_smp;
+	int next_fd;
+	struct embedded_fd_set close_on_exec_init;
+	struct embedded_fd_set open_fds_init;
+	struct file * fd_array[NR_OPEN_DEFAULT];
+};
+
+#define files_fdtable(files) (rcu_dereference((files)->fdt))
+
+extern struct kmem_cache *filp_cachep;
+
+struct file_operations;
+struct vfsmount;
+struct dentry;
+
+extern int expand_files(struct files_struct *, int nr);
+extern void free_fdtable_rcu(struct rcu_head *rcu);
+extern void __init files_defer_init(void);
+
+static inline void free_fdtable(struct fdtable *fdt)
+{
+	call_rcu(&fdt->rcu, free_fdtable_rcu);
+}
+
+static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
+{
+	struct file * file = NULL;
+	struct fdtable *fdt = files_fdtable(files);
+
+	if (fd < fdt->max_fds)
+		file = rcu_dereference(fdt->fd[fd]);
+	return file;
+}
+
+/*
+ * Check whether the specified fd has an open file.
+ */
+#define fcheck(fd)	fcheck_files(current->files, fd)
+
+struct task_struct;
+
+struct files_struct *get_files_struct(struct task_struct *);
+void put_files_struct(struct files_struct *fs);
+void reset_files_struct(struct files_struct *);
+int unshare_files(struct files_struct **);
+
+extern struct kmem_cache *files_cachep;
+
+#endif /* __LINUX_FDTABLE_H */
diff --git a/include/linux/file.h b/include/linux/file.h
index 69baf5a4..27c64bd 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -5,59 +5,11 @@
 #ifndef __LINUX_FILE_H
 #define __LINUX_FILE_H
 
-#include <asm/atomic.h>
-#include <linux/posix_types.h>
 #include <linux/compiler.h>
-#include <linux/spinlock.h>
-#include <linux/rcupdate.h>
 #include <linux/types.h>
+#include <linux/posix_types.h>
 
-/*
- * The default fd array needs to be at least BITS_PER_LONG,
- * as this is the granularity returned by copy_fdset().
- */
-#define NR_OPEN_DEFAULT BITS_PER_LONG
-
-/*
- * The embedded_fd_set is a small fd_set,
- * suitable for most tasks (which open <= BITS_PER_LONG files)
- */
-struct embedded_fd_set {
-	unsigned long fds_bits[1];
-};
-
-struct fdtable {
-	unsigned int max_fds;
-	struct file ** fd;      /* current fd array */
-	fd_set *close_on_exec;
-	fd_set *open_fds;
-	struct rcu_head rcu;
-	struct fdtable *next;
-};
-
-/*
- * Open file table structure
- */
-struct files_struct {
-  /*
-   * read mostly part
-   */
-	atomic_t count;
-	struct fdtable *fdt;
-	struct fdtable fdtab;
-  /*
-   * written part on a separate cache line in SMP
-   */
-	spinlock_t file_lock ____cacheline_aligned_in_smp;
-	int next_fd;
-	struct embedded_fd_set close_on_exec_init;
-	struct embedded_fd_set open_fds_init;
-	struct file * fd_array[NR_OPEN_DEFAULT];
-};
-
-#define files_fdtable(files) (rcu_dereference((files)->fdt))
-
-extern struct kmem_cache *filp_cachep;
+struct file;
 
 extern void __fput(struct file *);
 extern void fput(struct file *);
@@ -85,41 +37,7 @@
 extern int get_unused_fd(void);
 extern int get_unused_fd_flags(int flags);
 extern void put_unused_fd(unsigned int fd);
-struct kmem_cache;
-
-extern int expand_files(struct files_struct *, int nr);
-extern void free_fdtable_rcu(struct rcu_head *rcu);
-extern void __init files_defer_init(void);
-
-static inline void free_fdtable(struct fdtable *fdt)
-{
-	call_rcu(&fdt->rcu, free_fdtable_rcu);
-}
-
-static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
-{
-	struct file * file = NULL;
-	struct fdtable *fdt = files_fdtable(files);
-
-	if (fd < fdt->max_fds)
-		file = rcu_dereference(fdt->fd[fd]);
-	return file;
-}
-
-/*
- * Check whether the specified fd has an open file.
- */
-#define fcheck(fd)	fcheck_files(current->files, fd)
 
 extern void fd_install(unsigned int fd, struct file *file);
 
-struct task_struct;
-
-struct files_struct *get_files_struct(struct task_struct *);
-void put_files_struct(struct files_struct *fs);
-void reset_files_struct(struct files_struct *);
-int unshare_files(struct files_struct **);
-
-extern struct kmem_cache *files_cachep;
-
 #endif /* __LINUX_FILE_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index bf6b8a6..b24c287 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX__INIT_TASK_H
 #define _LINUX__INIT_TASK_H
 
-#include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/rcupdate.h>
 #include <linux/irqflags.h>
 #include <linux/utsname.h>
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 16d813b..ef45382 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -117,6 +117,8 @@
 extern int do_select(int n, fd_set_bits *fds, s64 *timeout);
 extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds,
 		       s64 *timeout);
+extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+			   fd_set __user *exp, s64 *timeout);
 
 #endif /* KERNEL */
 
diff --git a/kernel/exit.c b/kernel/exit.c
index d3ad546..1510f78 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -19,6 +19,7 @@
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/binfmts.h>
 #include <linux/nsproxy.h>
 #include <linux/pid_namespace.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 2bb675a..933e60e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -22,6 +22,7 @@
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/key.h>
 #include <linux/binfmts.h>
 #include <linux/mman.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index e276404..8df97d3 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -27,6 +27,7 @@
 #include <linux/mnt_namespace.h>
 #include <linux/completion.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
 #include <linux/mount.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1b50a6e..1c864c0 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -39,6 +39,7 @@
 #include <linux/spinlock.h>
 #include <linux/syscalls.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/ext2_fs.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c82cf15..e89338e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -834,16 +834,9 @@
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	int fd, r;
-	struct inode *inode;
-	struct file *file;
-
-	r = anon_inode_getfd(&fd, &inode, &file,
-			     "kvm-vcpu", &kvm_vcpu_fops, vcpu);
-	if (r) {
+	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu);
+	if (fd < 0)
 		kvm_put_kvm(vcpu->kvm);
-		return r;
-	}
 	return fd;
 }
 
@@ -1168,19 +1161,15 @@
 
 static int kvm_dev_ioctl_create_vm(void)
 {
-	int fd, r;
-	struct inode *inode;
-	struct file *file;
+	int fd;
 	struct kvm *kvm;
 
 	kvm = kvm_create_vm();
 	if (IS_ERR(kvm))
 		return PTR_ERR(kvm);
-	r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
-	if (r) {
+	fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm);
+	if (fd < 0)
 		kvm_put_kvm(kvm);
-		return r;
-	}
 
 	return fd;
 }