locking/ww_mutex: Optimize ww-mutexes by waking at most one waiter for backoff when acquiring the lock
The wait list is sorted by stamp order, and the only waiting task that may
have to back off is the first waiter with a context.
The regular slow path does not have to wake any other tasks at all, since
all other waiters that would have to back off were either woken up when
the waiter was added to the list, or detected the condition before they
added themselves.
Median timings taken of a contention-heavy GPU workload:
Without this series:
real 0m59.900s
user 0m7.516s
sys 2m16.076s
With changes up to and including this patch:
real 0m52.946s
user 0m7.272s
sys 1m55.964s
Signed-off-by: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-9-git-send-email-nhaehnle@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 6696721..51e2ba5 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -289,6 +289,36 @@ __ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
}
/*
+ * Wake up any waiters that may have to back off when the lock is held by the
+ * given context.
+ *
+ * Due to the invariants on the wait list, this can only affect the first
+ * waiter with a context.
+ *
+ * The current task must not be on the wait list.
+ */
+static void __sched
+__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter *cur;
+
+ lockdep_assert_held(&lock->wait_lock);
+
+ list_for_each_entry(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ if (cur->ww_ctx->acquired > 0 &&
+ __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
+ debug_mutex_wake_waiter(lock, cur);
+ wake_up_process(cur->task);
+ }
+
+ break;
+ }
+}
+
+/*
* After acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
*/
@@ -297,7 +327,6 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
unsigned long flags;
- struct mutex_waiter *cur;
ww_mutex_lock_acquired(lock, ctx);
@@ -323,16 +352,15 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
* so they can see the new lock->ctx.
*/
spin_lock_mutex(&lock->base.wait_lock, flags);
- list_for_each_entry(cur, &lock->base.wait_list, list) {
- debug_mutex_wake_waiter(&lock->base, cur);
- wake_up_process(cur->task);
- }
+ __ww_mutex_wakeup_for_backoff(&lock->base, ctx);
spin_unlock_mutex(&lock->base.wait_lock, flags);
}
/*
- * After acquiring lock in the slowpath set ctx and wake up any
- * waiters so they can recheck.
+ * After acquiring lock in the slowpath set ctx.
+ *
+ * Unlike for the fast path, the caller ensures that waiters are woken up where
+ * necessary.
*
* Callers must hold the mutex wait_lock.
*/
@@ -340,19 +368,8 @@ static __always_inline void
ww_mutex_set_context_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
- struct mutex_waiter *cur;
-
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
-
- /*
- * Give any possible sleeping processes the chance to wake up,
- * so they can recheck if they have to back off.
- */
- list_for_each_entry(cur, &lock->base.wait_list, list) {
- debug_mutex_wake_waiter(&lock->base, cur);
- wake_up_process(cur->task);
- }
}
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -719,8 +736,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
/*
* After waiting to acquire the wait_lock, try again.
*/
- if (__mutex_trylock(lock))
+ if (__mutex_trylock(lock)) {
+ if (use_ww_ctx && ww_ctx)
+ __ww_mutex_wakeup_for_backoff(lock, ww_ctx);
+
goto skip_wait;
+ }
debug_mutex_lock_common(lock, &waiter);
debug_mutex_add_waiter(lock, &waiter, current);