Skip to content

Commit 9501cce

Browse files
VSadovjkotas
andauthored
Avoid long busy-waiting between hijack retries. (#103212)
* refactor suspension sleep into minipal_microsleep, avoid long busy-waiting * rename * Apply suggestions from code review Co-authored-by: Jan Kotas <jkotas@microsoft.com> --------- Co-authored-by: Jan Kotas <jkotas@microsoft.com>
1 parent a900bbf commit 9501cce

File tree

11 files changed

+247
-95
lines changed

11 files changed

+247
-95
lines changed

src/coreclr/minipal/Unix/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
set(SOURCES
22
doublemapping.cpp
33
dn-u16.cpp
4+
${CLR_SRC_NATIVE_DIR}/minipal/time.c
45
)
56

67
if(NOT CLR_CROSS_COMPONENTS_BUILD)

src/coreclr/minipal/Windows/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ set(SOURCES
22
doublemapping.cpp
33
dn-u16.cpp
44
${CLR_SRC_NATIVE_DIR}/minipal/utf8.c
5+
${CLR_SRC_NATIVE_DIR}/minipal/time.c
56
)
67

78
if(NOT CLR_CROSS_COMPONENTS_BUILD)

src/coreclr/nativeaot/Runtime/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ set(COMMON_RUNTIME_SOURCES
5151
${GC_DIR}/softwarewritewatch.cpp
5252

5353
${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c
54+
${CLR_SRC_NATIVE_DIR}/minipal/time.c
5455
)
5556

5657
set(SERVER_GC_SOURCES

src/coreclr/nativeaot/Runtime/threadstore.cpp

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "RuntimeInstance.h"
2323
#include "TargetPtrs.h"
2424
#include "yieldprocessornormalized.h"
25+
#include <minipal/time.h>
2526

2627
#include "slist.inl"
2728

@@ -224,30 +225,6 @@ void ThreadStore::UnlockThreadStore()
224225
m_Lock.Leave();
225226
}
226227

227-
// exponential spinwait with an approximate time limit for waiting in microsecond range.
228-
// when iteration == -1, only usecLimit is used
229-
void SpinWait(int iteration, int usecLimit)
230-
{
231-
int64_t startTicks = PalQueryPerformanceCounter();
232-
int64_t ticksPerSecond = PalQueryPerformanceFrequency();
233-
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;
234-
235-
int l = iteration >= 0 ? min(iteration, 30): 30;
236-
for (int i = 0; i < l; i++)
237-
{
238-
for (int j = 0; j < (1 << i); j++)
239-
{
240-
System_YieldProcessor();
241-
}
242-
243-
int64_t currentTicks = PalQueryPerformanceCounter();
244-
if (currentTicks > endTicks)
245-
{
246-
break;
247-
}
248-
}
249-
}
250-
251228
void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
252229
{
253230
Thread * pThisThread = GetCurrentThreadIfAvailable();
@@ -265,16 +242,14 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
265242
// reason for this is that we essentially implement Dekker's algorithm, which requires write ordering.
266243
PalFlushProcessWriteBuffers();
267244

268-
int retries = 0;
269-
int prevRemaining = 0;
270-
int remaining = 0;
271-
bool observeOnly = false;
245+
int prevRemaining = INT32_MAX;
246+
bool observeOnly = true;
247+
uint32_t rehijackDelay = 8;
248+
uint32_t usecsSinceYield = 0;
272249

273250
while(true)
274251
{
275-
prevRemaining = remaining;
276-
remaining = 0;
277-
252+
int remaining = 0;
278253
FOREACH_THREAD(pTargetThread)
279254
{
280255
if (pTargetThread == pThisThread)
@@ -293,30 +268,42 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
293268
}
294269
END_FOREACH_THREAD
295270

296-
if (!remaining)
271+
if (remaining == 0)
297272
break;
298273

299274
// if we see progress or have just done a hijacking pass
300275
// do not hijack in the next iteration
301276
if (remaining < prevRemaining || !observeOnly)
302277
{
303278
// 5 usec delay, then check for more progress
304-
SpinWait(-1, 5);
279+
minipal_microdelay(5, &usecsSinceYield);
305280
observeOnly = true;
306281
}
307282
else
308283
{
309-
SpinWait(retries++, 100);
284+
minipal_microdelay(rehijackDelay, &usecsSinceYield);
310285
observeOnly = false;
311286

312-
// make sure our spining is not starving other threads, but not too often,
313-
// this can cause a 1-15 msec delay, depending on OS, and that is a lot while
314-
// very rarely needed, since threads are supposed to be releasing their CPUs
315-
if ((retries & 127) == 0)
287+
// double up rehijack delay in case we are rehjacking too often
288+
// up to 100 usec, as that should be enough to make progress.
289+
if (rehijackDelay < 100)
316290
{
317-
PalSwitchToThread();
291+
rehijackDelay *= 2;
318292
}
319293
}
294+
295+
prevRemaining = remaining;
296+
297+
// If we see 1 msec of uninterrupted wait, it is a concern.
298+
// Since we are stopping threads, there should be free cores to run on. Perhaps
299+
// some thread that we need to stop needs to run on the same core as ours.
300+
// Let's yield the timeslice to make sure such threads can run.
301+
// We will not do this often though, since this can introduce arbitrary delays.
302+
if (usecsSinceYield > 1000)
303+
{
304+
PalSwitchToThread();
305+
usecsSinceYield = 0;
306+
}
320307
}
321308

322309
#if defined(TARGET_ARM) || defined(TARGET_ARM64)

src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,6 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* p
10821082
// stack overflow too. Those are held in the sigsegv_handler with blocked signals until
10831083
// the process exits.
10841084
// ESRCH may happen on some OSes when the thread is exiting.
1085-
// The thread should leave cooperative mode, but we could have seen it in its earlier state.
10861085
if ((status == EAGAIN)
10871086
|| (status == ESRCH)
10881087
#ifdef __APPLE__

src/coreclr/pal/src/exception/signal.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,13 @@ PAL_ERROR InjectActivationInternal(CorUnix::CPalThread* pThread)
935935
}
936936
#endif
937937

938-
if ((status != 0) && (status != EAGAIN))
938+
// ESRCH may happen on some OSes when the thread is exiting.
939+
if (status == EAGAIN || status == ESRCH)
940+
{
941+
return ERROR_CANCELLED;
942+
}
943+
944+
if (status != 0)
939945
{
940946
// Failure to send the signal is fatal. There are only two cases when sending
941947
// the signal can fail. First, if the signal ID is invalid and second,

src/coreclr/vm/threadsuspend.cpp

Lines changed: 35 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "finalizerthread.h"
1616
#include "dbginterface.h"
17+
#include <minipal/time.h>
1718

1819
#define HIJACK_NONINTERRUPTIBLE_THREADS
1920

@@ -2188,6 +2189,8 @@ void Thread::RareDisablePreemptiveGC()
21882189
#if defined(FEATURE_HIJACK) && !defined(TARGET_UNIX)
21892190
ResetThreadState(Thread::TS_GCSuspendRedirected);
21902191
#endif
2192+
// make sure this is cleared - in case a signal is lost or somehow we did not act on it
2193+
m_hasPendingActivation = false;
21912194

21922195
DWORD status = GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete();
21932196
if (status != S_OK)
@@ -3207,44 +3210,6 @@ COR_PRF_SUSPEND_REASON GCSuspendReasonToProfSuspendReason(ThreadSuspend::SUSPEND
32073210
}
32083211
#endif // PROFILING_SUPPORTED
32093212

3210-
static int64_t QueryPerformanceCounter()
3211-
{
3212-
LARGE_INTEGER ts;
3213-
QueryPerformanceCounter(&ts);
3214-
return ts.QuadPart;
3215-
}
3216-
3217-
static int64_t QueryPerformanceFrequency()
3218-
{
3219-
LARGE_INTEGER ts;
3220-
QueryPerformanceFrequency(&ts);
3221-
return ts.QuadPart;
3222-
}
3223-
3224-
// exponential spinwait with an approximate time limit for waiting in microsecond range.
3225-
// when iteration == -1, only usecLimit is used
3226-
void SpinWait(int iteration, int usecLimit)
3227-
{
3228-
int64_t startTicks = QueryPerformanceCounter();
3229-
int64_t ticksPerSecond = QueryPerformanceFrequency();
3230-
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;
3231-
3232-
int l = iteration >= 0 ? min(iteration, 30): 30;
3233-
for (int i = 0; i < l; i++)
3234-
{
3235-
for (int j = 0; j < (1 << i); j++)
3236-
{
3237-
System_YieldProcessor();
3238-
}
3239-
3240-
int64_t currentTicks = QueryPerformanceCounter();
3241-
if (currentTicks > endTicks)
3242-
{
3243-
break;
3244-
}
3245-
}
3246-
}
3247-
32483213
//************************************************************************************
32493214
//
32503215
// SuspendRuntime is responsible for ensuring that all managed threads reach a
@@ -3335,16 +3300,14 @@ void ThreadSuspend::SuspendAllThreads()
33353300
// See VSW 475315 and 488918 for details.
33363301
::FlushProcessWriteBuffers();
33373302

3338-
int retries = 0;
3339-
int prevRemaining = 0;
3340-
int remaining = 0;
3341-
bool observeOnly = false;
3303+
int prevRemaining = INT32_MAX;
3304+
bool observeOnly = true;
3305+
uint32_t rehijackDelay = 8;
3306+
uint32_t usecsSinceYield = 0;
33423307

33433308
while(true)
33443309
{
3345-
prevRemaining = remaining;
3346-
remaining = 0;
3347-
3310+
int remaining = 0;
33483311
Thread* pTargetThread = NULL;
33493312
while ((pTargetThread = ThreadStore::GetThreadList(pTargetThread)) != NULL)
33503313
{
@@ -3361,30 +3324,42 @@ void ThreadSuspend::SuspendAllThreads()
33613324
}
33623325
}
33633326

3364-
if (!remaining)
3327+
if (remaining == 0)
33653328
break;
33663329

33673330
// if we see progress or have just done a hijacking pass
33683331
// do not hijack in the next iteration
33693332
if (remaining < prevRemaining || !observeOnly)
33703333
{
33713334
// 5 usec delay, then check for more progress
3372-
SpinWait(-1, 5);
3335+
minipal_microdelay(5, &usecsSinceYield);
33733336
observeOnly = true;
33743337
}
33753338
else
33763339
{
3377-
SpinWait(retries++, 100);
3340+
minipal_microdelay(rehijackDelay, &usecsSinceYield);
33783341
observeOnly = false;
33793342

3380-
// make sure our spining is not starving other threads, but not too often,
3381-
// this can cause a 1-15 msec delay, depending on OS, and that is a lot while
3382-
// very rarely needed, since threads are supposed to be releasing their CPUs
3383-
if ((retries & 127) == 0)
3343+
// double up rehijack delay in case we are rehjacking too often
3344+
// up to 100 usec, as that should be enough to make progress.
3345+
if (rehijackDelay < 100)
33843346
{
3385-
SwitchToThread();
3347+
rehijackDelay *= 2;
33863348
}
33873349
}
3350+
3351+
prevRemaining = remaining;
3352+
3353+
// If we see 1 msec of uninterrupted wait, it is a concern.
3354+
// Since we are stopping threads, there should be free cores to run on. Perhaps
3355+
// some thread that we need to stop needs to run on the same core as ours.
3356+
// Let's yield the timeslice to make sure such threads can run.
3357+
// We will not do this often though, since this can introduce arbitrary delays.
3358+
if (usecsSinceYield > 1000)
3359+
{
3360+
SwitchToThread();
3361+
usecsSinceYield = 0;
3362+
}
33883363
}
33893364

33903365
#if defined(TARGET_ARM) || defined(TARGET_ARM64)
@@ -5937,7 +5912,13 @@ bool Thread::InjectActivation(ActivationReason reason)
59375912
if (hThread != INVALID_HANDLE_VALUE)
59385913
{
59395914
m_hasPendingActivation = true;
5940-
return ::PAL_InjectActivation(hThread);
5915+
BOOL success = ::PAL_InjectActivation(hThread);
5916+
if (!success)
5917+
{
5918+
m_hasPendingActivation = false;
5919+
}
5920+
5921+
return success;
59415922
}
59425923
}
59435924

src/native/minipal/configure.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,9 @@ check_function_exists(sysctlbyname HAVE_SYSCTLBYNAME)
88
check_symbol_exists(arc4random_buf "stdlib.h" HAVE_ARC4RANDOM_BUF)
99
check_symbol_exists(O_CLOEXEC fcntl.h HAVE_O_CLOEXEC)
1010

11+
check_symbol_exists(
12+
clock_gettime_nsec_np
13+
time.h
14+
HAVE_CLOCK_GETTIME_NSEC_NP)
15+
1116
configure_file(${CMAKE_CURRENT_LIST_DIR}/minipalconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/minipalconfig.h)

src/native/minipal/minipalconfig.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55
#cmakedefine01 HAVE_AUXV_HWCAP_H
66
#cmakedefine01 HAVE_O_CLOEXEC
77
#cmakedefine01 HAVE_SYSCTLBYNAME
8+
#cmakedefine01 HAVE_CLOCK_GETTIME_NSEC_NP
89

910
#endif

0 commit comments

Comments
 (0)