[PATCH] cpusets: swap migration interface
Add a boolean "memory_migrate" to each cpuset, represented by a file
containing "0" or "1" in each directory below /dev/cpuset.
It defaults to false (file contains "0"). It can be set true by writing
"1" to the file.
If true, then anytime that a task is attached to the cpuset so marked, the
pages of that task will be moved to that cpuset, preserving, to the extent
practical, the cpuset-relative placement of the pages.
Also anytime that a cpuset so marked has its memory placement changed (by
writing to its "mems" file), the tasks in that cpuset will have their pages
moved to the cpusets new nodes, preserving, to the extent practical, the
cpuset-relative placement of the moved pages.
Signed-off-by: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7430640..f63383e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -87,6 +87,7 @@
typedef enum {
CS_CPU_EXCLUSIVE,
CS_MEM_EXCLUSIVE,
+ CS_MEMORY_MIGRATE,
CS_REMOVED,
CS_NOTIFY_ON_RELEASE
} cpuset_flagbits_t;
@@ -112,6 +113,11 @@
return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
}
+static inline int is_memory_migrate(const struct cpuset *cs)
+{
+ return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+}
+
/*
* Increment this atomic integer everytime any cpuset changes its
* mems_allowed value. Users of cpusets can track this generation
@@ -602,16 +608,24 @@
if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
struct cpuset *cs;
nodemask_t oldmem = current->mems_allowed;
+ int migrate;
down(&callback_sem);
task_lock(current);
cs = current->cpuset;
+ migrate = is_memory_migrate(cs);
guarantee_online_mems(cs, ¤t->mems_allowed);
current->cpuset_mems_generation = cs->mems_generation;
task_unlock(current);
up(&callback_sem);
- if (!nodes_equal(oldmem, current->mems_allowed))
+ if (!nodes_equal(oldmem, current->mems_allowed)) {
numa_policy_rebind(&oldmem, ¤t->mems_allowed);
+ if (migrate) {
+ do_migrate_pages(current->mm, &oldmem,
+ ¤t->mems_allowed,
+ MPOL_MF_MOVE_ALL);
+ }
+ }
}
}
@@ -795,7 +809,7 @@
/*
* update_flag - read a 0 or a 1 in a file and update associated flag
* bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
- * CS_NOTIFY_ON_RELEASE)
+ * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE)
* cs: the cpuset to update
* buf: the buffer where we read the 0 or 1
*
@@ -848,6 +862,7 @@
struct task_struct *tsk;
struct cpuset *oldcs;
cpumask_t cpus;
+ nodemask_t from, to;
if (sscanf(pidbuf, "%d", &pid) != 1)
return -EIO;
@@ -893,7 +908,12 @@
guarantee_online_cpus(cs, &cpus);
set_cpus_allowed(tsk, cpus);
+ from = oldcs->mems_allowed;
+ to = cs->mems_allowed;
+
up(&callback_sem);
+ if (is_memory_migrate(cs))
+ do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
put_task_struct(tsk);
if (atomic_dec_and_test(&oldcs->count))
check_for_release(oldcs, ppathbuf);
@@ -905,6 +925,7 @@
typedef enum {
FILE_ROOT,
FILE_DIR,
+ FILE_MEMORY_MIGRATE,
FILE_CPULIST,
FILE_MEMLIST,
FILE_CPU_EXCLUSIVE,
@@ -960,6 +981,9 @@
case FILE_NOTIFY_ON_RELEASE:
retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
break;
+ case FILE_MEMORY_MIGRATE:
+ retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
+ break;
case FILE_TASKLIST:
retval = attach_task(cs, buffer, &pathbuf);
break;
@@ -1060,6 +1084,9 @@
case FILE_NOTIFY_ON_RELEASE:
*s++ = notify_on_release(cs) ? '1' : '0';
break;
+ case FILE_MEMORY_MIGRATE:
+ *s++ = is_memory_migrate(cs) ? '1' : '0';
+ break;
default:
retval = -EINVAL;
goto out;
@@ -1408,6 +1435,11 @@
.private = FILE_NOTIFY_ON_RELEASE,
};
+static struct cftype cft_memory_migrate = {
+ .name = "memory_migrate",
+ .private = FILE_MEMORY_MIGRATE,
+};
+
static int cpuset_populate_dir(struct dentry *cs_dentry)
{
int err;
@@ -1422,6 +1454,8 @@
return err;
if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
return err;
+ if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
+ return err;
if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
return err;
return 0;