Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
29f59ed
threading: lock-free fast path for SemaphoreSlim.WaitAsync
thomhurst Mar 11, 2026
7ca90a6
Fix Release() race with WaitAsync CAS fast path
thomhurst Mar 11, 2026
0128aed
Fix double-grant race in WaitAsyncCore slow path
thomhurst Mar 11, 2026
e5c8ae1
Fix double-grant race in WaitCore
thomhurst Mar 11, 2026
78503b5
Remove stale Debug.Assert and add AvailableWaitHandle concurrent-init…
thomhurst Apr 4, 2026
3e9f436
Fix spurious SemaphoreFullException race in Release() under concurren…
thomhurst Apr 4, 2026
a03375d
Fix misleading comment and British spelling in WaitAsyncCore fast path
thomhurst Apr 4, 2026
4493eb9
Add concurrent fast-path and WaitCore/WaitAsync stress tests for Sema…
thomhurst Apr 4, 2026
5129c72
Remove SemaphoreSlimCas proxy — benchmarks now measure the real patch…
thomhurst Apr 4, 2026
8a9178c
Wrap AvailableWaitHandle stress test loop in try/finally to ensure ac…
thomhurst Apr 4, 2026
97032cd
Extract TryDecrementCount helper, add stress tests and benchmarks
thomhurst Apr 4, 2026
6b4b994
Add SpinWait backoff to AvailableWaitHandle stress test accessor loop
thomhurst Apr 4, 2026
5188767
Address PR feedback: fix Task<bool> typing, remove orphaned benchmark…
thomhurst Apr 27, 2026
56f36e9
Drop redundant netCount comment
thomhurst Apr 27, 2026
730521e
Harden lock-free fast path against memory-model and ordering hazards
thomhurst Apr 27, 2026
3b616ce
Move WaitCore retry-loop locals inside the loop
thomhurst Apr 27, 2026
60419be
Refactor Release_BulkRelease test to avoid two-phase deadlock
thomhurst Apr 28, 2026
8fb1fa9
Address Copilot review: linearizable Release check, real cancellation…
thomhurst Jun 10, 2026
649efb6
Simplify async-handoff loop in Release to a single counter
thomhurst Jun 10, 2026
84d733f
Fix Release transient count bump stealing async-waiter permits
thomhurst Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ public class SemaphoreSlim : IDisposable
// The number of synchronously waiting threads, it is set to zero in the constructor and increments before blocking the
// threading and decrements it back after that. It is used as flag for the release call to know if there are
// waiting threads in the monitor or not.
private int m_waitCount;
// Volatile so the lock-free WaitAsync fast path observes the increment via release/acquire pairing rather than
// depending on the lock release of the writer (which the fast path bypasses).
private volatile int m_waitCount;

/// <summary>
/// This is used to help prevent waking more waiters than necessary. It's not perfect and sometimes more waiters than
Expand All @@ -57,7 +59,9 @@ public class SemaphoreSlim : IDisposable
private volatile ManualResetEvent? m_waitHandle;

// Head of list representing asynchronous waits on the semaphore.
private TaskNode? m_asyncHead;
// Volatile for the same reason as m_waitCount: the lock-free WaitAsync fast path reads it without the lock
// and must see writes published by the lock-holding enqueue/dequeue paths.
private volatile TaskNode? m_asyncHead;

// Tail of list representing asynchronous waits on the semaphore.
private TaskNode? m_asyncTail;
Expand Down Expand Up @@ -106,9 +110,23 @@ public WaitHandle AvailableWaitHandle
// lock the count to avoid multiple threads initializing the handle if it is null
lock (m_lockObjAndDisposed)
{
// The initial state for the wait handle is true if the count is greater than zero
// false otherwise
m_waitHandle ??= new ManualResetEvent(m_currentCount != 0);
if (m_waitHandle is null)
{
// Publish the handle in the unsignaled state first, then reflect the current count.
// Once m_waitHandle is non-null, the lock-free WaitAsync fast path is excluded (it gates
// on m_waitHandle being null). The barrier prevents the m_currentCount read from being
// reordered before the publish on weakly-ordered architectures: without it, a concurrent
// fast-path CAS that already happened could be missed here, leaving the handle Set when
// count == 0. Any fast path that completes between the publish and the count read is
// covered by its own post-CAS recovery branch.
var handle = new ManualResetEvent(false);
m_waitHandle = handle;
Interlocked.MemoryBarrier();
if (m_currentCount > 0)
{
handle.Set();
}
}
}
}

Expand Down Expand Up @@ -373,42 +391,49 @@ private bool WaitCore(long millisecondsTimeout, CancellationToken cancellationTo
// There are no async waiters, so we can proceed with normal synchronous waiting.
else
{
// If the count > 0 we are good to move on.
// If not, then wait if we were given allowed some wait duration

OperationCanceledException? oce = null;

if (m_currentCount == 0)
// Loop to handle the case where the lock-free WaitAsync fast path raced and decremented the
// count between our wait/check and TryDecrementCount. With m_waitCount visibly > 0 the fast
// path defers, so the loop typically runs once; the residual race during m_waitCount's
// publication makes the retry necessary for correctness.
while (true)
{
if (millisecondsTimeout == 0)
OperationCanceledException? oce = null;
bool timedOut = false;
if (m_currentCount == 0)
{
return false;
if (millisecondsTimeout == 0)
{
return false;
}

// Prepare for the main wait...
// wait until the count becomes greater than zero or the timeout is expired
try
{
timedOut = !WaitUntilCountOrTimeout(millisecondsTimeout, startTime, cancellationToken);
}
Comment on lines +409 to +414
catch (OperationCanceledException e) { oce = e; }
}

// Prepare for the main wait...
// wait until the count become greater than zero or the timeout is expired
try
// Now try to acquire. We prioritize acquisition over cancellation/timeout so that we don't
// lose any counts when there are asynchronous waiters in the mix. Asynchronous waiters
// defer to synchronous waiters in priority, which means that if it's possible an asynchronous
// waiter didn't get released because a synchronous waiter was present, we need to ensure
// that synchronous waiter succeeds so that they have a chance to release.
if (TryDecrementCount() > 0)
{
waitSuccessful = WaitUntilCountOrTimeout(millisecondsTimeout, startTime, cancellationToken);
waitSuccessful = true;
break;
}
catch (OperationCanceledException e) { oce = e; }
}

// Now try to acquire. We prioritize acquisition over cancellation/timeout so that we don't
// lose any counts when there are asynchronous waiters in the mix. Asynchronous waiters
// defer to synchronous waiters in priority, which means that if it's possible an asynchronous
// waiter didn't get released because a synchronous waiter was present, we need to ensure
// that synchronous waiter succeeds so that they have a chance to release.
Debug.Assert(!waitSuccessful || m_currentCount > 0,
"If the wait was successful, there should be count available.");
if (m_currentCount > 0)
{
waitSuccessful = true;
m_currentCount--;
}
else if (oce is not null)
{
throw oce;
if (oce is not null)
{
throw oce;
}
if (timedOut)
{
break;
}
}

// Exposing wait handle which is lazily initialized if needed
Expand Down Expand Up @@ -678,12 +703,37 @@ private Task<bool> WaitAsyncCore(long millisecondsTimeout, CancellationToken can
if (cancellationToken.IsCancellationRequested)
return Task.FromCanceled<bool>(cancellationToken);

// Fast path: try a lock-free acquire; falls through to the lock if it fails.
// Skipped when m_waitHandle is non-null to keep its state consistent under the lock.
if (m_waitHandle is null)
{
int current = m_currentCount;
// Best-effort waiter checks (m_asyncHead and m_waitCount are volatile, so plain reads
// are acquire-ordered): they may be updated after this read, but the CAS will fail if
// m_currentCount was concurrently decremented.
if (current > 0
&& m_asyncHead is null
&& m_waitCount == 0
&& Interlocked.CompareExchange(ref m_currentCount, current - 1, current) == current)
Comment thread
thomhurst marked this conversation as resolved.
{
// Handle the rare race where AvailableWaitHandle was initialized concurrently.
if (current == 1 && m_waitHandle is not null)
{
lock (m_lockObjAndDisposed)
{
if (m_waitHandle is not null && m_currentCount == 0)
m_waitHandle.Reset();
}
}
return Task.FromResult(true);
Comment thread
thomhurst marked this conversation as resolved.
}
}
Comment thread
thomhurst marked this conversation as resolved.

lock (m_lockObjAndDisposed)
{
// If there are counts available, allow this waiter to succeed.
if (m_currentCount > 0)
if (TryDecrementCount() > 0)
{
--m_currentCount;
if (m_waitHandle is not null && m_currentCount == 0) m_waitHandle.Reset();
return Task.FromResult(true);
}
Expand Down Expand Up @@ -759,6 +809,21 @@ private bool RemoveAsyncWaiter(TaskNode task)
return wasInList;
}

/// <summary>
/// Atomically decrements <see cref="m_currentCount"/> if it is positive, using a CAS loop
/// rather than a plain decrement because the lock-free fast path in <see cref="WaitAsyncCore"/>
/// can decrement <see cref="m_currentCount"/> concurrently without holding the lock.
/// </summary>
/// <returns>The pre-decrement value. A return value of 0 means no count was available.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private int TryDecrementCount()
{
int count = m_currentCount;
while (count > 0 && Interlocked.CompareExchange(ref m_currentCount, count - 1, count) != count)
count = m_currentCount;
return count;
}

/// <summary>Performs the asynchronous wait.</summary>
/// <param name="asyncWaiter">The asynchronous waiter.</param>
/// <param name="millisecondsTimeout">The timeout.</param>
Expand Down Expand Up @@ -842,21 +907,36 @@ public int Release(int releaseCount)

lock (m_lockObjAndDisposed)
{
// Read the m_currentCount into a local variable to avoid unnecessary volatile accesses inside the lock.
int currentCount = m_currentCount;
returnCount = currentCount;

// If the release count would result exceeding the maximum count, throw SemaphoreFullException.
if (m_maxCount - currentCount < releaseCount)
// Snapshot the live count. A lock-free WaitAsync fast path can decrement m_currentCount
// concurrently (it bypasses this lock); nothing increments it concurrently (every increment
// path holds this lock). So the real count can only be <= this snapshot until we update it.
int observed = m_currentCount;

// Validate against m_maxCount. Re-read on a mismatch so a racing fast-path decrement (which
// only lowers the real count) can't make us throw SemaphoreFullException spuriously off a
// stale, too-high snapshot. Because only decrements race, once observed + releaseCount <=
// m_maxCount holds for a snapshot >= the real count, the real count + releaseCount can't
// exceed m_maxCount either, so the bound we enforce on the atomic add below still holds.
while (m_maxCount - observed < releaseCount)
{
throw new SemaphoreFullException();
int reread = m_currentCount;
if (reread == observed)
{
throw new SemaphoreFullException();
}
observed = reread;
}
returnCount = observed;

// Increment the count by the actual release count
currentCount += releaseCount;
// Compute the post-release count in a LOCAL only. We must never store this inflated value into
// m_currentCount: it includes permits earmarked for the waiters released below, and the
// lock-free fast path (which reads m_currentCount without the lock) would observe and steal
// them in the window before we corrected the count. Instead we apply only the net delta once,
// atomically, at the end. Whenever waiters are present the count is 0 and no fast path can be
// racing (it requires count > 0 and no waiters), so this snapshot is stable here.
int currentCount = observed + releaseCount;

// Signal to any synchronous waiters, taking into account how many waiters have previously been pulsed to wake
// but have not yet woken
// Signal synchronous waiters, accounting for those already pulsed but not yet woken.
int waitCount = m_waitCount;
Debug.Assert(m_countOfWaitersPulsedToWake <= waitCount);
int waitersToNotify = Math.Min(currentCount, waitCount) - m_countOfWaitersPulsedToWake;
Expand Down Expand Up @@ -884,31 +964,38 @@ public int Release(int releaseCount)
// asynchronous waiters, we assume that all synchronous waiters will eventually
// acquire the semaphore. That could be a faulty assumption if those synchronous
// waits are canceled, but the wait code path will handle that.
// Permits handed to async waiters go straight to their tasks rather than into
// m_currentCount, so they're excluded from the net delta applied below.
int asyncReleased = 0;
if (m_asyncHead is not null)
{
Debug.Assert(m_asyncTail is not null, "tail should not be null if head isn't null");
int maxAsyncToRelease = currentCount - waitCount;
while (maxAsyncToRelease > 0 && m_asyncHead is not null)
while (asyncReleased < maxAsyncToRelease && m_asyncHead is not null)
{
--currentCount;
--maxAsyncToRelease;
++asyncReleased;

// Get the next async waiter to release and queue it to be completed
TaskNode waiterTask = m_asyncHead;
RemoveAsyncWaiter(waiterTask); // ensures waiterTask.Next/Prev are null
waiterTask.TrySetResult(result: true);
}
currentCount -= asyncReleased;
}
m_currentCount = currentCount;

// Exposing wait handle if it is not null
if (m_waitHandle is not null && returnCount == 0 && currentCount > 0)
// Apply the net change (permits released minus those handed straight to async waiters) in a
// single atomic add. A relative add (not an absolute store) folds in any fast-path decrements
// that raced since we snapshotted, and we never publish a count above the number of genuinely
// free permits, so the fast path can never observe a permit reserved for a waiter. The
// pre-validated snapshot bounds the result at or below m_maxCount.
int delta = releaseCount - asyncReleased;
int newCount = delta != 0 ? Interlocked.Add(ref m_currentCount, delta) : observed;

if (m_waitHandle is not null && observed == 0 && newCount > 0)
{
m_waitHandle.Set();
}
}

// And return the count
return returnCount;
}

Expand Down
Loading
Loading