Skip to content

Commit 8d796d8

Browse files
janvorlijkotasCopilot
authored
Redo fix lookup for current Thread in async signal handler (#122513)
This is a new version of the fix. The code in HijackCallback was using the fact whether the pThreadToHijack is NULL or not as an indicator of whether it should suspend inline or redirect the thread. So passing in the pThreadToHijack without other changes caused it to never suspend inline on Unix and it was causing hangs in System.Collections.Concurrent tests. The current CheckActivationSafePoint uses thread local storage to get the current Thread instance. But this function is called from async signal handler (the activation signal handler) and it is not allowed to access TLS variables there because the access can allocate and if the interrupted code was running in an allocation code, it could crash. There was no problem with this since .NET 1.0, but a change in the recent glibc version has broken this. We've got reports of crashes in this code due to the reason mentioned above. This change introduces an async safe mechanism for accessing the current Thread instance from async signal handlers. It uses a segmented array that can grow, but never shrink. Entries for threads are added when runtime creates a thread / attaches to an external thread and removed when the thread dies. The check for safety of the activation injection was further enhanced to make sure that the ScanReaderLock is not taken. In cases it would need to be taken, we just reject the location. Since NativeAOT is subject to the same issue, the code to maintain the thread id to thread instance map is placed to the minipal and shared between coreclr and NativeAOT. Closes #121581 --------- Co-authored-by: Jan Kotas <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 05b94f1 commit 8d796d8

File tree

17 files changed

+337
-63
lines changed

17 files changed

+337
-63
lines changed

src/coreclr/nativeaot/Runtime/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ set(COMMON_RUNTIME_SOURCES
5555
${CLR_SRC_NATIVE_DIR}/minipal/xoshiro128pp.c
5656
)
5757

58+
if (CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_ARCH_WASM)
59+
list(APPEND COMMON_RUNTIME_SOURCES
60+
${RUNTIME_DIR}/asyncsafethreadmap.cpp
61+
)
62+
endif()
63+
5864
set(SERVER_GC_SOURCES
5965
${GC_DIR}/gceesvr.cpp
6066
${GC_DIR}/gcsvr.cpp

src/coreclr/nativeaot/Runtime/thread.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ void Thread::Hijack()
621621
PalHijack(this);
622622
}
623623

624-
void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, Thread* pThreadToHijack)
624+
void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, Thread* pThreadToHijack, bool doInlineSuspend)
625625
{
626626
// If we are no longer trying to suspend, no need to do anything.
627627
// This is just an optimization. It is ok to race with the setting the trap flag here.
@@ -690,9 +690,8 @@ void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, Thread* pThreadToHij
690690
ASSERT(codeManager->IsUnwindable(pvAddress) || runtime->IsConservativeStackReportingEnabled());
691691
#endif
692692

693-
// if we are not given a thread to hijack
694693
// perform in-line wait on the current thread
695-
if (pThreadToHijack == NULL)
694+
if (doInlineSuspend)
696695
{
697696
ASSERT(pThread->m_interruptedContext == NULL);
698697
pThread->InlineSuspend(pThreadContext);

src/coreclr/nativeaot/Runtime/thread.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ class Thread : private RuntimeThreadLocals
280280
void* GetHijackedReturnAddress();
281281
static bool IsHijackTarget(void * address);
282282

283-
static void HijackCallback(NATIVE_CONTEXT* pThreadContext, Thread* pThreadToHijack);
283+
static void HijackCallback(NATIVE_CONTEXT* pThreadContext, Thread* pThreadToHijack, bool doInlineSuspend);
284284
#else // FEATURE_HIJACK
285285
void Unhijack() { }
286286
bool IsHijacked() { return false; }

src/coreclr/nativeaot/Runtime/threadstore.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include "TargetPtrs.h"
2323
#include "yieldprocessornormalized.h"
2424
#include <minipal/time.h>
25+
#include <minipal/thread.h>
26+
#include "asyncsafethreadmap.h"
2527

2628
#include "slist.inl"
2729

@@ -143,6 +145,14 @@ void ThreadStore::AttachCurrentThread(bool fAcquireThreadStoreLock)
143145
pAttachingThread->m_ThreadStateFlags = Thread::TSF_Attached;
144146

145147
pTS->m_ThreadList.PushHead(pAttachingThread);
148+
149+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
150+
if (!InsertThreadIntoAsyncSafeMap(pAttachingThread->m_threadId, pAttachingThread))
151+
{
152+
PalPrintFatalError("\nFailed to insert thread into async-safe map due to out of memory.\n");
153+
RhFailFast();
154+
}
155+
#endif // TARGET_UNIX && !TARGET_WASM
146156
}
147157

148158
// static
@@ -188,6 +198,9 @@ void ThreadStore::DetachCurrentThread()
188198
pTS->m_ThreadList.RemoveFirst(pDetachingThread);
189199
// tidy up GC related stuff (release allocation context, etc..)
190200
pDetachingThread->Detach();
201+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
202+
RemoveThreadFromAsyncSafeMap(pDetachingThread->m_threadId, pDetachingThread);
203+
#endif
191204
}
192205

193206
// post-mortem clean up.
@@ -352,6 +365,13 @@ EXTERN_C RuntimeThreadLocals* RhpGetThread()
352365
return &tls_CurrentThread;
353366
}
354367

368+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
369+
Thread * ThreadStore::GetCurrentThreadIfAvailableAsyncSafe()
370+
{
371+
return (Thread*)FindThreadInAsyncSafeMap(minipal_get_current_thread_id_no_cache());
372+
}
373+
#endif // TARGET_UNIX && !TARGET_WASM
374+
355375
#endif // !DACCESS_COMPILE
356376

357377
#ifdef _WIN32

src/coreclr/nativeaot/Runtime/threadstore.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ class ThreadStore
4747
static Thread * RawGetCurrentThread();
4848
static Thread * GetCurrentThread();
4949
static Thread * GetCurrentThreadIfAvailable();
50+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
51+
static Thread * GetCurrentThreadIfAvailableAsyncSafe();
52+
#endif
5053
static PTR_Thread GetSuspendingThread();
5154
static void AttachCurrentThread();
5255
static void AttachCurrentThread(bool fAcquireThreadStoreLock);

src/coreclr/nativeaot/Runtime/unix/PalUnix.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,24 +1017,24 @@ static struct sigaction g_previousActivationHandler;
10171017

10181018
static void ActivationHandler(int code, siginfo_t* siginfo, void* context)
10191019
{
1020-
// Only accept activations from the current process
1021-
if (siginfo->si_pid == getpid()
1020+
Thread* pThread = ThreadStore::GetCurrentThreadIfAvailableAsyncSafe();
1021+
if (pThread)
1022+
{
1023+
// Only accept activations from the current process
1024+
if (siginfo->si_pid == getpid()
10221025
#ifdef HOST_APPLE
1023-
// On Apple platforms si_pid is sometimes 0. It was confirmed by Apple to be expected, as the si_pid is tracked at the process level. So when multiple
1024-
// signals are in flight in the same process at the same time, it may be overwritten / zeroed.
1025-
|| siginfo->si_pid == 0
1026+
// On Apple platforms si_pid is sometimes 0. It was confirmed by Apple to be expected, as the si_pid is tracked at the process level. So when multiple
1027+
// signals are in flight in the same process at the same time, it may be overwritten / zeroed.
1028+
|| siginfo->si_pid == 0
10261029
#endif
1027-
)
1028-
{
1029-
// Make sure that errno is not modified
1030-
int savedErrNo = errno;
1031-
Thread::HijackCallback((NATIVE_CONTEXT*)context, NULL);
1032-
errno = savedErrNo;
1033-
}
1030+
)
1031+
{
1032+
// Make sure that errno is not modified
1033+
int savedErrNo = errno;
1034+
Thread::HijackCallback((NATIVE_CONTEXT*)context, pThread, true /* doInlineSuspend */);
1035+
errno = savedErrNo;
1036+
}
10341037

1035-
Thread* pThread = ThreadStore::GetCurrentThreadIfAvailable();
1036-
if (pThread)
1037-
{
10381038
pThread->SetActivationPending(false);
10391039
}
10401040

src/coreclr/nativeaot/Runtime/windows/PalMinWin.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -673,9 +673,9 @@ static void* g_returnAddressHijackTarget = NULL;
673673
static void NTAPI ActivationHandler(ULONG_PTR parameter)
674674
{
675675
CLONE_APC_CALLBACK_DATA* data = (CLONE_APC_CALLBACK_DATA*)parameter;
676-
Thread::HijackCallback((NATIVE_CONTEXT*)data->ContextRecord, NULL);
677-
678676
Thread* pThread = (Thread*)data->Parameter;
677+
Thread::HijackCallback((NATIVE_CONTEXT*)data->ContextRecord, pThread, true /* doInlineSuspend */);
678+
679679
pThread->SetActivationPending(false);
680680
}
681681

@@ -833,7 +833,7 @@ void PalHijack(Thread* pThreadToHijack)
833833

834834
if (isSafeToRedirect)
835835
{
836-
Thread::HijackCallback((NATIVE_CONTEXT*)&win32ctx, pThreadToHijack);
836+
Thread::HijackCallback((NATIVE_CONTEXT*)&win32ctx, pThreadToHijack, false /* doInlineSuspend */);
837837
}
838838
}
839839

src/coreclr/pal/src/exception/signal.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -936,22 +936,20 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex
936936
CONTEXTToNativeContext(&winContext, ucontext);
937937
}
938938
}
939+
940+
// Call the original handler when it is not ignored or default (terminate).
941+
if (g_previous_activation.sa_flags & SA_SIGINFO)
942+
{
943+
_ASSERTE(g_previous_activation.sa_sigaction != NULL);
944+
g_previous_activation.sa_sigaction(code, siginfo, context);
945+
}
939946
else
940947
{
941-
// Call the original handler when it is not ignored or default (terminate).
942-
if (g_previous_activation.sa_flags & SA_SIGINFO)
943-
{
944-
_ASSERTE(g_previous_activation.sa_sigaction != NULL);
945-
g_previous_activation.sa_sigaction(code, siginfo, context);
946-
}
947-
else
948+
if (g_previous_activation.sa_handler != SIG_IGN &&
949+
g_previous_activation.sa_handler != SIG_DFL)
948950
{
949-
if (g_previous_activation.sa_handler != SIG_IGN &&
950-
g_previous_activation.sa_handler != SIG_DFL)
951-
{
952-
_ASSERTE(g_previous_activation.sa_handler != NULL);
953-
g_previous_activation.sa_handler(code);
954-
}
951+
_ASSERTE(g_previous_activation.sa_handler != NULL);
952+
g_previous_activation.sa_handler(code);
955953
}
956954
}
957955
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#include "common.h"
5+
6+
#include "asyncsafethreadmap.h"
7+
8+
// Async safe lock free thread map for use in signal handlers
9+
10+
struct ThreadEntry
11+
{
12+
size_t osThread;
13+
void* pThread;
14+
};
15+
16+
#define MAX_THREADS_IN_SEGMENT 256
17+
18+
struct ThreadSegment
19+
{
20+
ThreadEntry entries[MAX_THREADS_IN_SEGMENT];
21+
ThreadSegment* pNext;
22+
};
23+
24+
static ThreadSegment *s_pAsyncSafeThreadMapHead = NULL;
25+
26+
bool InsertThreadIntoAsyncSafeMap(size_t osThread, void* pThread)
27+
{
28+
size_t startIndex = osThread % MAX_THREADS_IN_SEGMENT;
29+
30+
ThreadSegment* pSegment = s_pAsyncSafeThreadMapHead;
31+
ThreadSegment** ppSegment = &s_pAsyncSafeThreadMapHead;
32+
while (true)
33+
{
34+
if (pSegment == NULL)
35+
{
36+
// Need to add a new segment
37+
ThreadSegment* pNewSegment = new (nothrow) ThreadSegment();
38+
if (pNewSegment == NULL)
39+
{
40+
// Memory allocation failed
41+
return false;
42+
}
43+
44+
memset(pNewSegment, 0, sizeof(ThreadSegment));
45+
ThreadSegment* pExpected = NULL;
46+
if (!__atomic_compare_exchange_n(
47+
ppSegment,
48+
&pExpected,
49+
pNewSegment,
50+
false /* weak */,
51+
__ATOMIC_RELEASE /* success_memorder */,
52+
__ATOMIC_RELAXED /* failure_memorder */))
53+
{
54+
// Another thread added the segment first
55+
delete pNewSegment;
56+
pNewSegment = pExpected;
57+
}
58+
59+
pSegment = pNewSegment;
60+
}
61+
for (size_t i = 0; i < MAX_THREADS_IN_SEGMENT; i++)
62+
{
63+
size_t index = (startIndex + i) % MAX_THREADS_IN_SEGMENT;
64+
65+
size_t expected = 0;
66+
if (__atomic_compare_exchange_n(
67+
&pSegment->entries[index].osThread,
68+
&expected,
69+
osThread,
70+
false /* weak */,
71+
__ATOMIC_RELEASE /* success_memorder */,
72+
__ATOMIC_RELAXED /* failure_memorder */))
73+
{
74+
// Successfully inserted
75+
// Use atomic store with release to ensure proper ordering
76+
__atomic_store_n(&pSegment->entries[index].pThread, pThread, __ATOMIC_RELEASE);
77+
return true;
78+
}
79+
}
80+
81+
ppSegment = &pSegment->pNext;
82+
pSegment = __atomic_load_n(&pSegment->pNext, __ATOMIC_ACQUIRE);
83+
}
84+
}
85+
86+
void RemoveThreadFromAsyncSafeMap(size_t osThread, void* pThread)
87+
{
88+
size_t startIndex = osThread % MAX_THREADS_IN_SEGMENT;
89+
90+
ThreadSegment* pSegment = s_pAsyncSafeThreadMapHead;
91+
while (pSegment)
92+
{
93+
for (size_t i = 0; i < MAX_THREADS_IN_SEGMENT; i++)
94+
{
95+
size_t index = (startIndex + i) % MAX_THREADS_IN_SEGMENT;
96+
if (pSegment->entries[index].pThread == pThread)
97+
{
98+
// Found the entry, remove it
99+
pSegment->entries[index].pThread = NULL;
100+
__atomic_exchange_n(&pSegment->entries[index].osThread, (size_t)0, __ATOMIC_RELEASE);
101+
return;
102+
}
103+
}
104+
pSegment = __atomic_load_n(&pSegment->pNext, __ATOMIC_ACQUIRE);
105+
}
106+
}
107+
108+
void *FindThreadInAsyncSafeMap(size_t osThread)
109+
{
110+
size_t startIndex = osThread % MAX_THREADS_IN_SEGMENT;
111+
ThreadSegment* pSegment = s_pAsyncSafeThreadMapHead;
112+
while (pSegment)
113+
{
114+
for (size_t i = 0; i < MAX_THREADS_IN_SEGMENT; i++)
115+
{
116+
size_t index = (startIndex + i) % MAX_THREADS_IN_SEGMENT;
117+
// Use acquire to synchronize with release in InsertThreadIntoAsyncSafeMap
118+
if (__atomic_load_n(&pSegment->entries[index].osThread, __ATOMIC_ACQUIRE) == osThread)
119+
{
120+
return pSegment->entries[index].pThread;
121+
}
122+
}
123+
pSegment = __atomic_load_n(&pSegment->pNext, __ATOMIC_ACQUIRE);
124+
}
125+
return NULL;
126+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#ifndef __ASYNCSAFETHREADMAP_H__
5+
#define __ASYNCSAFETHREADMAP_H__
6+
7+
#if defined(TARGET_UNIX) && !defined(TARGET_WASM)
8+
9+
// Insert a thread into the async-safe map.
10+
// * osThread - The OS thread ID to insert.
11+
// * pThread - A pointer to the thread object to associate with the OS thread ID.
12+
// * return true if the insertion was successful, false otherwise (OOM).
13+
bool InsertThreadIntoAsyncSafeMap(size_t osThread, void* pThread);
14+
15+
// Remove a thread from the async-safe map.
16+
// * osThread - The OS thread ID to remove.
17+
// * pThread - A pointer to the thread object associated with the OS thread ID.
18+
void RemoveThreadFromAsyncSafeMap(size_t osThread, void* pThread);
19+
20+
// Find a thread in the async-safe map.
21+
// * osThread - The OS thread ID to search for.
22+
// * return - A pointer to the thread object associated with the OS thread ID, or NULL if not found.
23+
void* FindThreadInAsyncSafeMap(size_t osThread);
24+
25+
#endif // TARGET_UNIX && !TARGET_WASM
26+
27+
#endif // __ASYNCSAFETHREADMAP_H__

0 commit comments

Comments
 (0)