Skip to content

Commit 11cb5c1

Browse files
Waiman-LongRezaT4795
authored andcommitted
locking/rwsem: Allow slowpath writer to ignore handoff bit if not set by first waiter
commit 6eebd5fb20838f5971ba17df9f55cc4f84a31053 upstream. With commit d257cc8 ("locking/rwsem: Make handoff bit handling more consistent"), the writer that sets the handoff bit can be interrupted out without clearing the bit if the wait queue isn't empty. This disables reader and writer optimistic lock spinning and stealing. Now if a non-first writer in the queue is somehow woken up or a new waiter enters the slowpath, it can't acquire the lock. This is not the case before commit d257cc8 as the writer that set the handoff bit will clear it when exiting out via the out_nolock path. This is less efficient as the busy rwsem stays in an unlock state for a longer time. In some cases, this new behavior may cause lockups as shown in [1] and [2]. This patch allows a non-first writer to ignore the handoff bit if it is not originally set or initiated by the first waiter. This patch is shown to be effective in fixing the lockup problem reported in [1]. [1] https://lore.kernel.org/lkml/20220617134325.GC30825@techsingularity.net/ [2] https://lore.kernel.org/lkml/3f02975c-1a9d-be20-32cf-f1d8e3dfafcc@oracle.com/ Fixes: d257cc8 ("locking/rwsem: Make handoff bit handling more consistent") Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: John Donnelly <john.p.donnelly@oracle.com> Tested-by: Mel Gorman <mgorman@techsingularity.net> Link: https://lore.kernel.org/r/20220622200419.778799-1-longman@redhat.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent e679a2f commit 11cb5c1

File tree

1 file changed

+20
-10
lines changed

1 file changed

+20
-10
lines changed

kernel/locking/rwsem.c

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -335,8 +335,6 @@ struct rwsem_waiter {
335335
struct task_struct *task;
336336
enum rwsem_waiter_type type;
337337
unsigned long timeout;
338-
339-
/* Writer only, not initialized in reader */
340338
bool handoff_set;
341339
};
342340
#define rwsem_first_waiter(sem) \
@@ -456,10 +454,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
456454
* to give up the lock), request a HANDOFF to
457455
* force the issue.
458456
*/
459-
if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
460-
time_after(jiffies, waiter->timeout)) {
461-
adjustment -= RWSEM_FLAG_HANDOFF;
462-
lockevent_inc(rwsem_rlock_handoff);
457+
if (time_after(jiffies, waiter->timeout)) {
458+
if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
459+
adjustment -= RWSEM_FLAG_HANDOFF;
460+
lockevent_inc(rwsem_rlock_handoff);
461+
}
462+
waiter->handoff_set = true;
463463
}
464464

465465
atomic_long_add(-adjustment, &sem->count);
@@ -569,7 +569,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
569569
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
570570
struct rwsem_waiter *waiter)
571571
{
572-
bool first = rwsem_first_waiter(sem) == waiter;
572+
struct rwsem_waiter *first = rwsem_first_waiter(sem);
573573
long count, new;
574574

575575
lockdep_assert_held(&sem->wait_lock);
@@ -579,11 +579,20 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
579579
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
580580

581581
if (has_handoff) {
582-
if (!first)
582+
/*
583+
* Honor handoff bit and yield only when the first
584+
* waiter is the one that set it. Otherwisee, we
585+
* still try to acquire the rwsem.
586+
*/
587+
if (first->handoff_set && (waiter != first))
583588
return false;
584589

585-
/* First waiter inherits a previously set handoff bit */
586-
waiter->handoff_set = true;
590+
/*
591+
* First waiter can inherit a previously set handoff
592+
* bit and spin on rwsem if lock acquisition fails.
593+
*/
594+
if (waiter == first)
595+
waiter->handoff_set = true;
587596
}
588597

589598
new = count;
@@ -978,6 +987,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
978987
waiter.task = current;
979988
waiter.type = RWSEM_WAITING_FOR_READ;
980989
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
990+
waiter.handoff_set = false;
981991

982992
raw_spin_lock_irq(&sem->wait_lock);
983993
if (list_empty(&sem->wait_list)) {

0 commit comments

Comments
 (0)