Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/coreclr/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,33 @@ struct CORINFO_THREAD_STATIC_INFO_NATIVEAOT
CORINFO_CONST_LOOKUP tlsGetAddrFtnPtr;
};

//----------------------------------------------------------------------------
// getObjectAllocContextInfo and CORINFO_OBJECT_ALLOC_CONTEXT_INFO: The EE instructs the JIT
// about how to access the thread-local allocation context for inline object allocation.

struct CORINFO_OBJECT_ALLOC_CONTEXT_INFO
{
// Whether inline allocation is supported for this runtime configuration.
// False when: GCStress enabled, allocation tracking/sampling active,
// non-thread-local allocation contexts, etc.
bool supported;

// Offsets within the ee_alloc_context structure
uint32_t allocPtrFieldOffset; // Offset of alloc_ptr
uint32_t combinedLimitFieldOffset; // Offset of combined_limit

// MethodTable layout offset
uint32_t methodTableBaseSizeOffset; // Offset of m_BaseSize in MethodTable

// TLS access info (platform-specific)
CORINFO_CONST_LOOKUP tlsIndex; // Windows: address of _tls_index (IAT_VALUE)
uint32_t offsetOfThreadLocalStoragePointer; // Windows: TEB offset for TLS array (0x58 on x64, 0x58 on ARM64)
CORINFO_CONST_LOOKUP tlsRoot; // Windows: byte offset from the module TLS base to t_runtime_thread_locals (IAT_VALUE);
// Linux x64: TLSGD descriptor address
void* tlsGetAddrFtnPtr; // Linux x64: address of __tls_get_addr
size_t tlsRootOffset; // Linux ARM64: pre-computed tpidr_el0 offset to t_runtime_thread_locals
};

//----------------------------------------------------------------------------
// Exception handling

Expand Down Expand Up @@ -3208,6 +3235,9 @@ class ICorStaticInfo
// Returns the primitive type for passing/returning a Wasm struct by value,
// or CORINFO_WASM_TYPE_VOID if passing/returning must be by reference.
virtual CorInfoWasmType getWasmLowering(CORINFO_CLASS_HANDLE structHnd) = 0;

// Returns information about the thread-local allocation context for inline object allocation.
virtual void getObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) = 0;
};

/*****************************************************************************
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/icorjitinfoimpl_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ void getThreadLocalStaticBlocksInfo(
void getThreadLocalStaticInfo_NativeAOT(
CORINFO_THREAD_STATIC_INFO_NATIVEAOT* pInfo) override;

void getObjectAllocContextInfo(
CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) override;

bool isFieldStatic(
CORINFO_FIELD_HANDLE fldHnd) override;

Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@

#include <minipal/guid.h>

constexpr GUID JITEEVersionIdentifier = { /* 9383dd79-4927-4fee-a314-84cff6e87501 */
0x9383dd79,
0x4927,
0x4fee,
{0xa3, 0x14, 0x84, 0xcf, 0xf6, 0xe8, 0x75, 0x01}
constexpr GUID JITEEVersionIdentifier = { /* 7b2c0eb5-6677-4c72-bbf3-f9d32c55a6b7 */
0x7b2c0eb5,
0x6677,
0x4c72,
{0xbb, 0xf3, 0xf9, 0xd3, 0x2c, 0x55, 0xa6, 0xb7}
};

#endif // JIT_EE_VERSIONING_GUID_H
1 change: 1 addition & 0 deletions src/coreclr/jit/ICorJitInfo_names_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ DEF_CLR_API(getFieldInfo)
DEF_CLR_API(getThreadLocalFieldInfo)
DEF_CLR_API(getThreadLocalStaticBlocksInfo)
DEF_CLR_API(getThreadLocalStaticInfo_NativeAOT)
DEF_CLR_API(getObjectAllocContextInfo)
DEF_CLR_API(isFieldStatic)
DEF_CLR_API(getArrayOrStringLength)
DEF_CLR_API(getBoundaries)
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,14 @@ void WrapICorJitInfo::getThreadLocalStaticInfo_NativeAOT(
API_LEAVE(getThreadLocalStaticInfo_NativeAOT);
}

void WrapICorJitInfo::getObjectAllocContextInfo(
CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo)
{
API_ENTER(getObjectAllocContextInfo);
wrapHnd->getObjectAllocContextInfo(pInfo);
API_LEAVE(getObjectAllocContextInfo);
}

bool WrapICorJitInfo::isFieldStatic(
CORINFO_FIELD_HANDLE fldHnd)
{
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,10 @@ class CodeGen final : public CodeGenInterface
void genCodeForStoreLclFld(GenTreeLclFld* tree);
void genCodeForStoreLclVar(GenTreeLclVar* tree);
void genCodeForReturnTrap(GenTreeOp* tree);
void genCodeForAllocObj(GenTreeAllocObj* tree);
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
void genInlineAllocCall(GenTreeCall* call);
#endif
void genCodeForStoreInd(GenTreeStoreInd* tree);
void genCodeForSwap(GenTreeOp* tree);
void genCodeForCpObj(GenTreeBlk* cpObjNode);
Expand Down
142 changes: 142 additions & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3050,11 +3050,153 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode)
}
}

//------------------------------------------------------------------------
// genInlineAllocCall: Expand a CORINFO_HELP_NEWSFAST call inline with a
// bump-pointer fast path and a slow-path fallback to the helper.
//
// On ARM64, the allocation sequence is:
// 1. TLS access to get the ee_alloc_context
// - Windows: x18 (TEB) + TLS array + index + offset
// - Linux: mrs xN, tpidr_el0 + pre-computed offset
// 2. Bump-pointer allocation (non-GC-interruptible)
// 3. If allocation doesn't fit: fall through to the normal helper call
//
#ifdef TARGET_ARM64
void CodeGen::genInlineAllocCall(GenTreeCall* call)
{
const CORINFO_OBJECT_ALLOC_CONTEXT_INFO* allocInfo = m_compiler->compGetAllocContextInfo();
assert(allocInfo->supported);

genCallPlaceRegArgs(call);

regNumber dstReg = call->GetRegNum();
regNumber mtReg = REG_ARG_0; // x0

// Use IP0/IP1 (x16/x17) as scratch — they are caller-saved and not arg regs.
regNumber allocCtxReg = REG_IP0; // x16
regNumber tmpReg = REG_IP1; // x17

// Since this replaces a call, all caller-saved registers except mtReg (x0) are free.
// We use x1 to save mtReg, and x2 to hold alloc_ptr during the bump allocation.
regNumber savedMtReg = REG_R1;
regNumber allocPtrScratch = REG_R2;

emitter* emit = GetEmitter();

// ---- TLS access: get pointer to ee_alloc_context ----
if (TargetOS::IsWindows)
{
// Windows ARM64: x18 holds TEB
// ldr allocCtxReg, [x18, #offsetOfTLS] // TEB -> TLS array
// mov tmpReg, #_tls_index
// ldr allocCtxReg, [allocCtxReg, tmpReg, lsl #3]
// add allocCtxReg, allocCtxReg, #tlsRoot
emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, allocCtxReg, REG_R18,
(int)allocInfo->offsetOfThreadLocalStoragePointer);

assert(allocInfo->tlsIndex.accessType == IAT_VALUE);
instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, (ssize_t)allocInfo->tlsIndex.addr);

emit->emitIns_R_R_R_Ext(INS_ldr, EA_PTRSIZE, allocCtxReg, allocCtxReg, tmpReg, INS_OPTS_LSL, 3);

assert(allocInfo->tlsRoot.accessType == IAT_VALUE);
ssize_t tlsRootVal = (ssize_t)allocInfo->tlsRoot.addr;
instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, tlsRootVal);
emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, allocCtxReg, allocCtxReg, tmpReg);
}
else
{
// Linux ARM64: mrs xN, tpidr_el0 + pre-computed offset. No function call needed!
emit->emitIns_R(INS_mrs_tpid0, EA_PTRSIZE, allocCtxReg);
if (allocInfo->tlsRootOffset != 0)
{
instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, (ssize_t)allocInfo->tlsRootOffset);
emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, allocCtxReg, allocCtxReg, tmpReg);
}
}

BasicBlock* slowPath = genCreateTempLabel();

// ---- Bump allocation (non-GC-interruptible) ----
emit->emitDisableGC();

// Save mtReg so we can reuse x0 as a scratch register.
emit->emitIns_Mov(INS_mov, EA_PTRSIZE, savedMtReg, mtReg, /* canSkip */ false);

// Load m_BaseSize (32-bit) from the MethodTable
emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, mtReg, (int)allocInfo->methodTableBaseSizeOffset);

// Load alloc_ptr and combined_limit
emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, allocPtrScratch, allocCtxReg, (int)allocInfo->allocPtrFieldOffset);
emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, dstReg, allocCtxReg, (int)allocInfo->combinedLimitFieldOffset);

// available = combined_limit - alloc_ptr; if (baseSize > available) goto slowPath
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, dstReg, dstReg, allocPtrScratch);
emit->emitIns_R_R(INS_cmp, EA_PTRSIZE, tmpReg, dstReg);
inst_JMP(EJ_hi, slowPath);

// Fast path: allocation fits.
// new_alloc_ptr = alloc_ptr + baseSize
emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, allocPtrScratch, tmpReg);

// Store MethodTable pointer at offset 0 of the new object
emit->emitIns_R_R_I(INS_str, EA_PTRSIZE, savedMtReg, allocPtrScratch, 0);

// Update alloc_ptr in the ee_alloc_context
emit->emitIns_R_R_I(INS_str, EA_PTRSIZE, tmpReg, allocCtxReg, (int)allocInfo->allocPtrFieldOffset);

// Result = alloc_ptr (the new object)
emit->emitIns_Mov(INS_mov, EA_PTRSIZE, dstReg, allocPtrScratch, /* canSkip */ true);

emit->emitEnableGC();

BasicBlock* done = genCreateTempLabel();
inst_JMP(EJ_jmp, done);

// ---- Slow path ----
genDefineTempLabel(slowPath);

// Restore mtReg for the helper call
emit->emitIns_Mov(INS_mov, EA_PTRSIZE, mtReg, savedMtReg, /* canSkip */ false);

genEmitHelperCall(CORINFO_HELP_NEWSFAST, 0, EA_PTRSIZE);

// Helper returns the new object in x0.
if (dstReg != REG_INTRET)
{
inst_Mov(TYP_REF, dstReg, REG_INTRET, /* canSkip */ false);
}

// ---- Done ----
genDefineTempLabel(done);

gcInfo.gcMarkRegPtrVal(dstReg, TYP_REF);

if (call->GetRegNum() != dstReg)
{
inst_Mov(TYP_REF, call->GetRegNum(), dstReg, /* canSkip */ false);
gcInfo.gcMarkRegPtrVal(call->GetRegNum(), TYP_REF);
gcInfo.gcMarkRegSetNpt(genRegMask(dstReg));
}

genProduceReg(call);
}
#endif // TARGET_ARM64

//------------------------------------------------------------------------
// genCall: Produce code for a GT_CALL node
//
void CodeGen::genCall(GenTreeCall* call)
{
#ifdef TARGET_ARM64
// Check if this is an allocation helper call marked for inline expansion
if ((call->gtCallMoreFlags & GTF_CALL_M_EXPAND_INLINE_ALLOC) != 0)
{
genInlineAllocCall(call);
return;
}
#endif

genCallPlaceRegArgs(call);

// Insert a null check on "this" pointer if asked.
Expand Down
Loading
Loading