Commit c45a4393 by Dmitry Vyukov Committed by Copybara-Service

absl:speed up Mutex::[Reader]TryLock

Tidy up Mutex::[Reader]TryLock codegen by outlining slow path
and non-tail function call, and un-unrolling the loop.

Current codegen:
https://gist.githubusercontent.com/dvyukov/a4d353fd71ac873af9332c1340675b60/raw/226537ffa305b25a79ef3a85277fa870fee5191d/gistfile1.txt

New codegen:
https://gist.githubusercontent.com/dvyukov/686a094c5aa357025689764f155e5a29/raw/e3125c1cdb5669fac60faf336e2f60395e29d888/gistfile1.txt

name                                   old cpu/op   new cpu/op   delta
BM_TryLock                             18.0ns ± 0%  17.7ns ± 0%   -1.64%  (p=0.016 n=4+5)
BM_ReaderTryLock/real_time/threads:1   17.9ns ± 0%  17.9ns ± 0%   -0.10%  (p=0.016 n=5+5)
BM_ReaderTryLock/real_time/threads:72  9.61µs ± 8%  8.42µs ± 7%  -12.37%  (p=0.008 n=5+5)

PiperOrigin-RevId: 567006472
Change-Id: Iea0747e71bbf2dc1f00c70a4235203071d795b99
parent adcaae43
......@@ -1582,26 +1582,36 @@ bool Mutex::AwaitCommon(const Condition& cond, KernelTimeout t) {
bool Mutex::TryLock() {
ABSL_TSAN_MUTEX_PRE_LOCK(this, __tsan_mutex_try_lock);
intptr_t v = mu_.load(std::memory_order_relaxed);
if ((v & (kMuWriter | kMuReader | kMuEvent)) == 0 && // try fast acquire
mu_.compare_exchange_strong(v, kMuWriter | v, std::memory_order_acquire,
std::memory_order_relaxed)) {
DebugOnlyLockEnter(this);
ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0);
return true;
}
if ((v & kMuEvent) != 0) { // we're recording events
if ((v & kExclusive->slow_need_zero) == 0 && // try fast acquire
mu_.compare_exchange_strong(
v, (kExclusive->fast_or | v) + kExclusive->fast_add,
std::memory_order_acquire, std::memory_order_relaxed)) {
// Try fast acquire.
if (ABSL_PREDICT_TRUE((v & (kMuWriter | kMuReader | kMuEvent)) == 0)) {
if (ABSL_PREDICT_TRUE(mu_.compare_exchange_strong(
v, kMuWriter | v, std::memory_order_acquire,
std::memory_order_relaxed))) {
DebugOnlyLockEnter(this);
PostSynchEvent(this, SYNCH_EV_TRYLOCK_SUCCESS);
ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0);
return true;
} else {
PostSynchEvent(this, SYNCH_EV_TRYLOCK_FAILED);
}
} else if (ABSL_PREDICT_FALSE((v & kMuEvent) != 0)) {
// We're recording events.
return TryLockSlow();
}
ABSL_TSAN_MUTEX_POST_LOCK(
this, __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, 0);
return false;
}
ABSL_ATTRIBUTE_NOINLINE bool Mutex::TryLockSlow() {
intptr_t v = mu_.load(std::memory_order_relaxed);
if ((v & kExclusive->slow_need_zero) == 0 && // try fast acquire
mu_.compare_exchange_strong(
v, (kExclusive->fast_or | v) + kExclusive->fast_add,
std::memory_order_acquire, std::memory_order_relaxed)) {
DebugOnlyLockEnter(this);
PostSynchEvent(this, SYNCH_EV_TRYLOCK_SUCCESS);
ABSL_TSAN_MUTEX_POST_LOCK(this, __tsan_mutex_try_lock, 0);
return true;
}
PostSynchEvent(this, SYNCH_EV_TRYLOCK_FAILED);
ABSL_TSAN_MUTEX_POST_LOCK(
this, __tsan_mutex_try_lock | __tsan_mutex_try_lock_failed, 0);
return false;
......@@ -1611,41 +1621,57 @@ bool Mutex::ReaderTryLock() {
ABSL_TSAN_MUTEX_PRE_LOCK(this,
__tsan_mutex_read_lock | __tsan_mutex_try_lock);
intptr_t v = mu_.load(std::memory_order_relaxed);
// Clang tends to unroll the loop when compiling with optimization.
// But in this case it just unnecessary increases code size.
// If CAS is failing due to contention, the jump cost is negligible.
#if defined(__clang__)
#pragma nounroll
#endif
// The while-loops (here and below) iterate only if the mutex word keeps
// changing (typically because the reader count changes) under the CAS. We
// limit the number of attempts to avoid having to think about livelock.
int loop_limit = 5;
while ((v & (kMuWriter | kMuWait | kMuEvent)) == 0 && loop_limit != 0) {
if (mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne,
std::memory_order_acquire,
std::memory_order_relaxed)) {
// changing (typically because the reader count changes) under the CAS.
// We limit the number of attempts to avoid having to think about livelock.
for (int loop_limit = 5; loop_limit != 0; loop_limit--) {
if (ABSL_PREDICT_FALSE((v & (kMuWriter | kMuWait | kMuEvent)) != 0)) {
break;
}
if (ABSL_PREDICT_TRUE(mu_.compare_exchange_strong(
v, (kMuReader | v) + kMuOne, std::memory_order_acquire,
std::memory_order_relaxed))) {
DebugOnlyLockEnter(this);
ABSL_TSAN_MUTEX_POST_LOCK(
this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0);
return true;
}
loop_limit--;
v = mu_.load(std::memory_order_relaxed);
}
if ((v & kMuEvent) != 0) { // we're recording events
loop_limit = 5;
while ((v & kShared->slow_need_zero) == 0 && loop_limit != 0) {
if (mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne,
std::memory_order_acquire,
std::memory_order_relaxed)) {
DebugOnlyLockEnter(this);
PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_SUCCESS);
ABSL_TSAN_MUTEX_POST_LOCK(
this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0);
return true;
}
loop_limit--;
v = mu_.load(std::memory_order_relaxed);
}
if ((v & kMuEvent) != 0) {
PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_FAILED);
if (ABSL_PREDICT_TRUE((v & kMuEvent) == 0)) {
ABSL_TSAN_MUTEX_POST_LOCK(this,
__tsan_mutex_read_lock | __tsan_mutex_try_lock |
__tsan_mutex_try_lock_failed,
0);
return false;
}
// we're recording events
return ReaderTryLockSlow();
}
ABSL_ATTRIBUTE_NOINLINE bool Mutex::ReaderTryLockSlow() {
intptr_t v = mu_.load(std::memory_order_relaxed);
#if defined(__clang__)
#pragma nounroll
#endif
for (int loop_limit = 5; loop_limit != 0; loop_limit--) {
if ((v & kShared->slow_need_zero) == 0 &&
mu_.compare_exchange_strong(v, (kMuReader | v) + kMuOne,
std::memory_order_acquire,
std::memory_order_relaxed)) {
DebugOnlyLockEnter(this);
PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_SUCCESS);
ABSL_TSAN_MUTEX_POST_LOCK(
this, __tsan_mutex_read_lock | __tsan_mutex_try_lock, 0);
return true;
}
}
PostSynchEvent(this, SYNCH_EV_READERTRYLOCK_FAILED);
ABSL_TSAN_MUTEX_POST_LOCK(this,
__tsan_mutex_read_lock | __tsan_mutex_try_lock |
__tsan_mutex_try_lock_failed,
......
......@@ -521,6 +521,10 @@ class ABSL_LOCKABLE Mutex {
int flags) ABSL_ATTRIBUTE_COLD;
// slow path release
void UnlockSlow(SynchWaitParams* waitp) ABSL_ATTRIBUTE_COLD;
// TryLock slow path.
bool TryLockSlow();
// ReaderTryLock slow path.
bool ReaderTryLockSlow();
// Common code between Await() and AwaitWithTimeout/Deadline()
bool AwaitCommon(const Condition& cond,
synchronization_internal::KernelTimeout t);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment