diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 61ac918ffda3f3..6cee88067477e8 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1696,6 +1696,33 @@ struct CORINFO_THREAD_STATIC_INFO_NATIVEAOT CORINFO_CONST_LOOKUP tlsGetAddrFtnPtr; }; +//---------------------------------------------------------------------------- +// getObjectAllocContextInfo and CORINFO_OBJECT_ALLOC_CONTEXT_INFO: The EE instructs the JIT +// about how to access the thread-local allocation context for inline object allocation. + +struct CORINFO_OBJECT_ALLOC_CONTEXT_INFO +{ + // Whether inline allocation is supported for this runtime configuration. + // False when: GCStress enabled, allocation tracking/sampling active, + // non-thread-local allocation contexts, etc. + bool supported; + + // Offsets within the ee_alloc_context structure + uint32_t allocPtrFieldOffset; // Offset of alloc_ptr + uint32_t combinedLimitFieldOffset; // Offset of combined_limit + + // MethodTable layout offset + uint32_t methodTableBaseSizeOffset; // Offset of m_BaseSize in MethodTable + + // TLS access info (platform-specific) + CORINFO_CONST_LOOKUP tlsIndex; // Windows: address of _tls_index (IAT_VALUE) + uint32_t offsetOfThreadLocalStoragePointer; // Windows: TEB offset for TLS array (0x58 on x64, 0x58 on ARM64) + CORINFO_CONST_LOOKUP tlsRoot; // Windows: byte offset from the module TLS base to t_runtime_thread_locals (IAT_VALUE); + // Linux x64: TLSGD descriptor address + void* tlsGetAddrFtnPtr; // Linux x64: address of __tls_get_addr + size_t tlsRootOffset; // Linux ARM64: pre-computed tpidr_el0 offset to t_runtime_thread_locals +}; + //---------------------------------------------------------------------------- // Exception handling @@ -3208,6 +3235,9 @@ class ICorStaticInfo // Returns the primitive type for passing/returning a Wasm struct by value, // or CORINFO_WASM_TYPE_VOID if passing/returning must be by reference. virtual CorInfoWasmType getWasmLowering(CORINFO_CLASS_HANDLE structHnd) = 0; + + // Returns information about the thread-local allocation context for inline object allocation. + virtual void getObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) = 0; }; /***************************************************************************** diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index 7a67f9ea0ff3cd..3d060a9fa00e75 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -422,6 +422,9 @@ void getThreadLocalStaticBlocksInfo( void getThreadLocalStaticInfo_NativeAOT( CORINFO_THREAD_STATIC_INFO_NATIVEAOT* pInfo) override; +void getObjectAllocContextInfo( + CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) override; + bool isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) override; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 4a498d88caed12..7406edecc6dbf6 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 9383dd79-4927-4fee-a314-84cff6e87501 */ - 0x9383dd79, - 0x4927, - 0x4fee, - {0xa3, 0x14, 0x84, 0xcf, 0xf6, 0xe8, 0x75, 0x01} +constexpr GUID JITEEVersionIdentifier = { /* 7b2c0eb5-6677-4c72-bbf3-f9d32c55a6b7 */ + 0x7b2c0eb5, + 0x6677, + 0x4c72, + {0xbb, 0xf3, 0xf9, 0xd3, 0x2c, 0x55, 0xa6, 0xb7} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index d03d03b1007970..d62bae363b36d0 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -106,6 +106,7 @@ DEF_CLR_API(getFieldInfo) DEF_CLR_API(getThreadLocalFieldInfo) DEF_CLR_API(getThreadLocalStaticBlocksInfo) DEF_CLR_API(getThreadLocalStaticInfo_NativeAOT) +DEF_CLR_API(getObjectAllocContextInfo) DEF_CLR_API(isFieldStatic) DEF_CLR_API(getArrayOrStringLength) DEF_CLR_API(getBoundaries) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index 80411912d6c9cc..4fea0149300cdc 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -1005,6 +1005,14 @@ void WrapICorJitInfo::getThreadLocalStaticInfo_NativeAOT( API_LEAVE(getThreadLocalStaticInfo_NativeAOT); } +void WrapICorJitInfo::getObjectAllocContextInfo( + CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + API_ENTER(getObjectAllocContextInfo); + wrapHnd->getObjectAllocContextInfo(pInfo); + API_LEAVE(getObjectAllocContextInfo); +} + bool WrapICorJitInfo::isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 6b2ca6af8484d4..a3b98f199e0c4e 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1151,6 +1151,10 @@ class CodeGen final : public CodeGenInterface void genCodeForStoreLclFld(GenTreeLclFld* tree); void genCodeForStoreLclVar(GenTreeLclVar* tree); void genCodeForReturnTrap(GenTreeOp* tree); + void genCodeForAllocObj(GenTreeAllocObj* tree); +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) + void genInlineAllocCall(GenTreeCall* call); +#endif void genCodeForStoreInd(GenTreeStoreInd* tree); void genCodeForSwap(GenTreeOp* tree); void genCodeForCpObj(GenTreeBlk* cpObjNode); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index fcef600921e254..4293d3c8d50e6e 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3050,11 +3050,153 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) } } +//------------------------------------------------------------------------ +// genInlineAllocCall: Expand a CORINFO_HELP_NEWSFAST call inline with a +// bump-pointer fast path and a slow-path fallback to the helper. +// +// On ARM64, the allocation sequence is: +// 1. TLS access to get the ee_alloc_context +// - Windows: x18 (TEB) + TLS array + index + offset +// - Linux: mrs xN, tpidr_el0 + pre-computed offset +// 2. Bump-pointer allocation (non-GC-interruptible) +// 3. If allocation doesn't fit: fall through to the normal helper call +// +#ifdef TARGET_ARM64 +void CodeGen::genInlineAllocCall(GenTreeCall* call) +{ + const CORINFO_OBJECT_ALLOC_CONTEXT_INFO* allocInfo = m_compiler->compGetAllocContextInfo(); + assert(allocInfo->supported); + + genCallPlaceRegArgs(call); + + regNumber dstReg = call->GetRegNum(); + regNumber mtReg = REG_ARG_0; // x0 + + // Use IP0/IP1 (x16/x17) as scratch — they are caller-saved and not arg regs. + regNumber allocCtxReg = REG_IP0; // x16 + regNumber tmpReg = REG_IP1; // x17 + + // Since this replaces a call, all caller-saved registers except mtReg (x0) are free. + // We use x1 to save mtReg, and x2 to hold alloc_ptr during the bump allocation. + regNumber savedMtReg = REG_R1; + regNumber allocPtrScratch = REG_R2; + + emitter* emit = GetEmitter(); + + // ---- TLS access: get pointer to ee_alloc_context ---- + if (TargetOS::IsWindows) + { + // Windows ARM64: x18 holds TEB + // ldr allocCtxReg, [x18, #offsetOfTLS] // TEB -> TLS array + // mov tmpReg, #_tls_index + // ldr allocCtxReg, [allocCtxReg, tmpReg, lsl #3] + // add allocCtxReg, allocCtxReg, #tlsRoot + emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, allocCtxReg, REG_R18, + (int)allocInfo->offsetOfThreadLocalStoragePointer); + + assert(allocInfo->tlsIndex.accessType == IAT_VALUE); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, (ssize_t)allocInfo->tlsIndex.addr); + + emit->emitIns_R_R_R_Ext(INS_ldr, EA_PTRSIZE, allocCtxReg, allocCtxReg, tmpReg, INS_OPTS_LSL, 3); + + assert(allocInfo->tlsRoot.accessType == IAT_VALUE); + ssize_t tlsRootVal = (ssize_t)allocInfo->tlsRoot.addr; + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, tlsRootVal); + emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, allocCtxReg, allocCtxReg, tmpReg); + } + else + { + // Linux ARM64: mrs xN, tpidr_el0 + pre-computed offset. No function call needed! + emit->emitIns_R(INS_mrs_tpid0, EA_PTRSIZE, allocCtxReg); + if (allocInfo->tlsRootOffset != 0) + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, (ssize_t)allocInfo->tlsRootOffset); + emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, allocCtxReg, allocCtxReg, tmpReg); + } + } + + BasicBlock* slowPath = genCreateTempLabel(); + + // ---- Bump allocation (non-GC-interruptible) ---- + emit->emitDisableGC(); + + // Save mtReg so we can reuse x0 as a scratch register. + emit->emitIns_Mov(INS_mov, EA_PTRSIZE, savedMtReg, mtReg, /* canSkip */ false); + + // Load m_BaseSize (32-bit) from the MethodTable + emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, mtReg, (int)allocInfo->methodTableBaseSizeOffset); + + // Load alloc_ptr and combined_limit + emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, allocPtrScratch, allocCtxReg, (int)allocInfo->allocPtrFieldOffset); + emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, dstReg, allocCtxReg, (int)allocInfo->combinedLimitFieldOffset); + + // available = combined_limit - alloc_ptr; if (baseSize > available) goto slowPath + emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, dstReg, dstReg, allocPtrScratch); + emit->emitIns_R_R(INS_cmp, EA_PTRSIZE, tmpReg, dstReg); + inst_JMP(EJ_hi, slowPath); + + // Fast path: allocation fits. + // new_alloc_ptr = alloc_ptr + baseSize + emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, allocPtrScratch, tmpReg); + + // Store MethodTable pointer at offset 0 of the new object + emit->emitIns_R_R_I(INS_str, EA_PTRSIZE, savedMtReg, allocPtrScratch, 0); + + // Update alloc_ptr in the ee_alloc_context + emit->emitIns_R_R_I(INS_str, EA_PTRSIZE, tmpReg, allocCtxReg, (int)allocInfo->allocPtrFieldOffset); + + // Result = alloc_ptr (the new object) + emit->emitIns_Mov(INS_mov, EA_PTRSIZE, dstReg, allocPtrScratch, /* canSkip */ true); + + emit->emitEnableGC(); + + BasicBlock* done = genCreateTempLabel(); + inst_JMP(EJ_jmp, done); + + // ---- Slow path ---- + genDefineTempLabel(slowPath); + + // Restore mtReg for the helper call + emit->emitIns_Mov(INS_mov, EA_PTRSIZE, mtReg, savedMtReg, /* canSkip */ false); + + genEmitHelperCall(CORINFO_HELP_NEWSFAST, 0, EA_PTRSIZE); + + // Helper returns the new object in x0. + if (dstReg != REG_INTRET) + { + inst_Mov(TYP_REF, dstReg, REG_INTRET, /* canSkip */ false); + } + + // ---- Done ---- + genDefineTempLabel(done); + + gcInfo.gcMarkRegPtrVal(dstReg, TYP_REF); + + if (call->GetRegNum() != dstReg) + { + inst_Mov(TYP_REF, call->GetRegNum(), dstReg, /* canSkip */ false); + gcInfo.gcMarkRegPtrVal(call->GetRegNum(), TYP_REF); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); + } + + genProduceReg(call); +} +#endif // TARGET_ARM64 + //------------------------------------------------------------------------ // genCall: Produce code for a GT_CALL node // void CodeGen::genCall(GenTreeCall* call) { +#ifdef TARGET_ARM64 + // Check if this is an allocation helper call marked for inline expansion + if ((call->gtCallMoreFlags & GTF_CALL_M_EXPAND_INLINE_ALLOC) != 0) + { + genInlineAllocCall(call); + return; + } +#endif + genCallPlaceRegArgs(call); // Insert a null check on "this" pointer if asked. diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 36a120250c6cf3..55a37cef51cbd2 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1726,6 +1726,142 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) genDefineTempLabel(skipLabel); } +//------------------------------------------------------------------------ +// genCodeForAllocObj: Generate code for GT_ALLOCOBJ - inline object allocation. +// GT_ALLOCOBJ should have been morphed to a helper call. This codepath should never be reached. +// +void CodeGen::genCodeForAllocObj(GenTreeAllocObj* tree) +{ + unreached(); +} + +//------------------------------------------------------------------------ +// genInlineAllocCall: Expand a CORINFO_HELP_NEWSFAST call inline with a +// bump-pointer fast path and a slow-path fallback to the helper. +// +// The call node has been processed by genCallPlaceRegArgs already, so the +// MethodTable argument is in REG_ARG_0 (rcx on Windows). This function +// replaces the call emission with: +// 1. TLS access to get the ee_alloc_context +// 2. Bump-pointer allocation (non-GC-interruptible) +// 3. If allocation doesn't fit: fall through to the normal helper call +// +#ifdef TARGET_AMD64 +void CodeGen::genInlineAllocCall(GenTreeCall* call) +{ + const CORINFO_OBJECT_ALLOC_CONTEXT_INFO* allocInfo = m_compiler->compGetAllocContextInfo(); + assert(allocInfo->supported); + + genCallPlaceRegArgs(call); + + regNumber dstReg = call->GetRegNum(); + regNumber mtReg = REG_ARG_0; + + regNumber allocCtxReg = REG_R10; + regNumber tmpReg = REG_R11; + + emitter* emit = GetEmitter(); + + // ---- TLS access: get pointer to ee_alloc_context ---- + if (TargetOS::IsWindows) + { + emit->emitIns_R_C(INS_mov, EA_PTRSIZE, allocCtxReg, FLD_GLOBAL_GS, + (int)allocInfo->offsetOfThreadLocalStoragePointer); + + assert(allocInfo->tlsIndex.accessType == IAT_VALUE); + instGen_Set_Reg_To_Imm(EA_4BYTE, tmpReg, (ssize_t)allocInfo->tlsIndex.addr); + + emit->emitIns_R_ARX(INS_mov, EA_PTRSIZE, allocCtxReg, allocCtxReg, tmpReg, 8, 0); + + assert(allocInfo->tlsRoot.accessType == IAT_VALUE); + emit->emitIns_R_AR(INS_lea, EA_PTRSIZE, allocCtxReg, allocCtxReg, (int)(ssize_t)allocInfo->tlsRoot.addr); + } + else + { + // Linux x64: call __tls_get_addr. Save arg register on the stack. + // Push an even number of 8-byte values for 16-byte stack alignment. + emit->emitIns_R(INS_push, EA_PTRSIZE, mtReg); + emit->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, 8); + + assert(allocInfo->tlsRoot.accessType == IAT_VALUE); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_ARG_0, (ssize_t)allocInfo->tlsRoot.addr); + + assert(allocInfo->tlsGetAddrFtnPtr != nullptr); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R11, (ssize_t)allocInfo->tlsGetAddrFtnPtr); + { + EmitCallParams callParams; + callParams.callType = EC_INDIR_R; + callParams.ireg = REG_R11; + callParams.noSafePoint = true; + genEmitCallWithCurrentGC(callParams); + } + + emit->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_R10, REG_RAX, /* canSkip */ false); + allocCtxReg = REG_R10; + + emit->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, 8); + emit->emitIns_R(INS_pop, EA_PTRSIZE, mtReg); + } + + BasicBlock* slowPath = genCreateTempLabel(); + + // ---- Bump allocation (non-GC-interruptible) ---- + GetEmitter()->emitDisableGC(); + + // Load m_BaseSize from the MethodTable + emit->emitIns_R_AR(INS_mov, EA_4BYTE, tmpReg, mtReg, (int)allocInfo->methodTableBaseSizeOffset); + + // Use subtraction-based comparison (matches the runtime helper) to avoid + // alloc_ptr + size overflow: available = combined_limit - alloc_ptr; + // if (size > available) goto slowPath; + emit->emitIns_R_AR(INS_mov, EA_PTRSIZE, dstReg, allocCtxReg, (int)allocInfo->combinedLimitFieldOffset); + emit->emitIns_R_AR(INS_sub, EA_PTRSIZE, dstReg, allocCtxReg, (int)allocInfo->allocPtrFieldOffset); + emit->emitIns_R_R(INS_cmp, EA_PTRSIZE, tmpReg, dstReg); + inst_JMP(EJ_ja, slowPath); + + // Allocation fits. dstReg = alloc_ptr (the new object). + emit->emitIns_R_AR(INS_mov, EA_PTRSIZE, dstReg, allocCtxReg, (int)allocInfo->allocPtrFieldOffset); + + // Compute and store new alloc_ptr = alloc_ptr + size + emit->emitIns_R_R(INS_add, EA_PTRSIZE, tmpReg, dstReg); + emit->emitIns_AR_R(INS_mov, EA_PTRSIZE, tmpReg, allocCtxReg, (int)allocInfo->allocPtrFieldOffset); + + // Set MethodTable pointer on the new object (always at offset 0) + emit->emitIns_AR_R(INS_mov, EA_PTRSIZE, mtReg, dstReg, 0); + + GetEmitter()->emitEnableGC(); + + BasicBlock* done = genCreateTempLabel(); + inst_JMP(EJ_jmp, done); + + // ---- Slow path ---- + genDefineTempLabel(slowPath); + + genEmitHelperCall(CORINFO_HELP_NEWSFAST, 0, EA_PTRSIZE); + + // Helper returns the new object in rax. + if (dstReg != REG_INTRET) + { + inst_Mov(TYP_REF, dstReg, REG_INTRET, /* canSkip */ false); + } + + // ---- Done ---- + genDefineTempLabel(done); + + gcInfo.gcMarkRegPtrVal(dstReg, TYP_REF); + + // Move result to the call's destination register if different + if (call->GetRegNum() != dstReg) + { + inst_Mov(TYP_REF, call->GetRegNum(), dstReg, /* canSkip */ false); + gcInfo.gcMarkRegPtrVal(call->GetRegNum(), TYP_REF); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); + } + + genProduceReg(call); +} +#endif // TARGET_AMD64 + /***************************************************************************** * * Generate code for a single node in the tree. @@ -1807,6 +1943,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genLclHeap(treeNode); break; + case GT_ALLOCOBJ: + genCodeForAllocObj(treeNode->AsAllocObj()); + break; + case GT_CNS_INT: #ifdef TARGET_X86 assert(!treeNode->IsIconHandle(GTF_ICON_TLS_HDL)); @@ -5874,6 +6014,21 @@ bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarri // Produce code for a GT_CALL node void CodeGen::genCall(GenTreeCall* call) { +#ifdef TARGET_AMD64 + // Check if this is an allocation helper call marked for inline expansion + if ((call->gtCallMoreFlags & GTF_CALL_M_EXPAND_INLINE_ALLOC) != 0) + { + // Handle AVX/SSE transition before the slow-path helper call + if (GetEmitter()->Contains256bitOrMoreAVX() && call->NeedsVzeroupper(m_compiler)) + { + instGen(INS_vzeroupper); + } + + genInlineAllocCall(call); + return; + } +#endif + genAlignStackBeforeCall(call); // all virtuals should have been expanded into a control expression diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index cb1137a8b4d0c5..05c591426a43d2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10297,6 +10297,10 @@ class Compiler bool compMaskConvertUsed = false; // Does the method have Convert Mask To Vector nodes. bool compUsesThrowHelper = false; // There is a call to a THROW_HELPER for the compiled method. + // Cached allocation context info for inline object allocation + CORINFO_OBJECT_ALLOC_CONTEXT_INFO compAllocContextInfo; + bool compAllocContextInfoInitialized = false; + // NOTE: These values are only reliable after // the importing is completely finished. @@ -11369,6 +11373,16 @@ class Compiler CORINFO_CONST_LOOKUP compGetHelperFtn(CorInfoHelpFunc ftnNum); + const CORINFO_OBJECT_ALLOC_CONTEXT_INFO* compGetAllocContextInfo() + { + if (!compAllocContextInfoInitialized) + { + info.compCompHnd->getObjectAllocContextInfo(&compAllocContextInfo); + compAllocContextInfoInitialized = true; + } + return &compAllocContextInfo; + } + // Several JIT/EE interface functions return a CorInfoType, and also return a // class handle as an out parameter if the type is a value class. Returns the // size of the type these describe. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3dfca0ba725a0a..9f60c5799c25eb 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2846,6 +2846,14 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) case GT_ARR_ADDR: break; + case GT_ALLOCOBJ: + if (op1->AsAllocObj()->gtNewHelper != op2->AsAllocObj()->gtNewHelper || + op1->AsAllocObj()->gtAllocObjClsHnd != op2->AsAllocObj()->gtAllocObjClsHnd) + { + return false; + } + break; + default: assert(!"unexpected unary ExOp operator"); } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ed72dd9f8c87a5..db1503864150b9 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4391,6 +4391,7 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_CAST_OBJ_NONNULL = 0x04000000, // if we expand this specific cast we don't need to check the input object for null // NOTE: if needed, this flag can be removed, and we can introduce new _NONNUL cast helpers GTF_CALL_M_STACK_ARRAY = 0x08000000, // this call is a new array helper for a stack allocated array. + GTF_CALL_M_EXPAND_INLINE_ALLOC = 0x10000000, // this allocation helper call should be expanded inline in codegen }; inline constexpr GenTreeCallFlags operator ~(GenTreeCallFlags a) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 7741e0f6b8fe19..82c784edded278 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -683,6 +683,7 @@ RELEASE_CONFIG_INTEGER(JitObjectStackAllocationConditionalEscape, "JitObjectStac CONFIG_STRING(JitObjectStackAllocationConditionalEscapeRange, "JitObjectStackAllocationConditionalEscapeRange") RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528) +RELEASE_CONFIG_INTEGER(JitInlineAllocFast, "JitInlineAllocFast", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationTrackFields, "JitObjectStackAllocationTrackFields", 1) CONFIG_STRING(JitObjectStackAllocationTrackFieldsRange, "JitObjectStackAllocationTrackFieldsRange") CONFIG_INTEGER(JitObjectStackAllocationDumpConnGraph, "JitObjectStackAllocationDumpConnGraph", 0) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 894ed2de75b952..79c199b56c3403 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -1337,11 +1337,29 @@ void ObjectAllocator::MorphAllocObjNode(AllocationCandidate& candidate) candidate.m_onHeapReason); if ((candidate.m_allocType == OAT_NEWOBJ) || (candidate.m_allocType == OAT_NEWOBJ_HEAP)) { - GenTree* const stmtExpr = candidate.m_tree; - GenTree* const oldData = stmtExpr->AsLclVar()->Data(); - GenTree* const newData = MorphAllocObjNodeIntoHelperCall(oldData->AsAllocObj()); - stmtExpr->AsLclVar()->Data() = newData; - stmtExpr->AddAllEffectsFlags(newData); + GenTree* const stmtExpr = candidate.m_tree; + GenTreeAllocObj* allocObj = stmtExpr->AsLclVar()->Data()->AsAllocObj(); + +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) + // Check if we can expand the allocation inline in codegen. + const CORINFO_OBJECT_ALLOC_CONTEXT_INFO* allocCtxInfo = m_compiler->compGetAllocContextInfo(); + if (allocObj->gtNewHelper == CORINFO_HELP_NEWSFAST && !allocObj->gtHelperHasSideEffects && + allocCtxInfo->supported && m_compiler->opts.OptimizationEnabled() && + JitConfig.JitInlineAllocFast() != 0) + { + JITDUMP("Marking allocation [%06u] for inline expansion\n", m_compiler->dspTreeID(allocObj)); + GenTree* const newData = MorphAllocObjNodeIntoHelperCall(allocObj); + newData->AsCall()->gtCallMoreFlags |= GTF_CALL_M_EXPAND_INLINE_ALLOC; + stmtExpr->AsLclVar()->Data() = newData; + stmtExpr->AddAllEffectsFlags(newData); + } + else +#endif // TARGET_AMD64 || TARGET_ARM64 + { + GenTree* const newData = MorphAllocObjNodeIntoHelperCall(allocObj); + stmtExpr->AsLclVar()->Data() = newData; + stmtExpr->AddAllEffectsFlags(newData); + } } if (IsTrackedLocal(lclNum)) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index 2178c5fd0827e4..d7c2ca5de8f257 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -3712,6 +3712,12 @@ private CorInfoWasmType getWasmLowering(CORINFO_CLASS_STRUCT_* structHnd) private uint getThreadTLSIndex(ref void* ppIndirection) { throw new NotImplementedException("getThreadTLSIndex"); } + private void getObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) + { + // NativeAOT/crossgen2: not yet implemented + *pInfo = default; + } + private Dictionary _helperCache = new Dictionary(); private void getHelperFtn(CorInfoHelpFunc ftnNum, CORINFO_CONST_LOOKUP *pNativeEntrypoint, CORINFO_METHOD_STRUCT_** pMethod) { diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index a1eebc3762aaa1..1f5ade80580888 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -122,6 +122,7 @@ static ICorJitInfoCallbacks() s_callbacks.getThreadLocalFieldInfo = &_getThreadLocalFieldInfo; s_callbacks.getThreadLocalStaticBlocksInfo = &_getThreadLocalStaticBlocksInfo; s_callbacks.getThreadLocalStaticInfo_NativeAOT = &_getThreadLocalStaticInfo_NativeAOT; + s_callbacks.getObjectAllocContextInfo = &_getObjectAllocContextInfo; s_callbacks.isFieldStatic = &_isFieldStatic; s_callbacks.getArrayOrStringLength = &_getArrayOrStringLength; s_callbacks.getBoundaries = &_getBoundaries; @@ -304,6 +305,7 @@ static ICorJitInfoCallbacks() public delegate* unmanaged getThreadLocalFieldInfo; public delegate* unmanaged getThreadLocalStaticBlocksInfo; public delegate* unmanaged getThreadLocalStaticInfo_NativeAOT; + public delegate* unmanaged getObjectAllocContextInfo; public delegate* unmanaged isFieldStatic; public delegate* unmanaged getArrayOrStringLength; public delegate* unmanaged getBoundaries; @@ -1899,6 +1901,20 @@ private static void _getThreadLocalStaticInfo_NativeAOT(IntPtr thisHandle, IntPt } } + [UnmanagedCallersOnly] + private static void _getObjectAllocContextInfo(IntPtr thisHandle, IntPtr* ppException, CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) + { + var _this = GetThis(thisHandle); + try + { + _this.getObjectAllocContextInfo(pInfo); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + } + } + [UnmanagedCallersOnly] private static byte _isFieldStatic(IntPtr thisHandle, IntPtr* ppException, CORINFO_FIELD_STRUCT_* fldHnd) { diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 718fde04a9a65b..e44515f94b679e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1276,6 +1276,19 @@ public unsafe struct CORINFO_THREAD_STATIC_INFO_NATIVEAOT public CORINFO_CONST_LOOKUP tlsGetAddrFtnPtr; }; + public unsafe struct CORINFO_OBJECT_ALLOC_CONTEXT_INFO + { + public byte supported; + public uint allocPtrFieldOffset; + public uint combinedLimitFieldOffset; + public uint methodTableBaseSizeOffset; + public CORINFO_CONST_LOOKUP tlsIndex; + public uint offsetOfThreadLocalStoragePointer; + public CORINFO_CONST_LOOKUP tlsRoot; + public nuint tlsGetAddrFtnPtr; + public nuint tlsRootOffset; + }; + // System V struct passing // The Classification types are described in the ABI spec at https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf public enum SystemVClassificationType : byte diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 8e605002f90174..4eeb7087d9f945 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -92,6 +92,7 @@ CORINFO_METHOD_INFO*,CORINFO_METHOD_INFO* CORINFO_FIELD_INFO*,CORINFO_FIELD_INFO* CORINFO_THREAD_STATIC_BLOCKS_INFO*,CORINFO_THREAD_STATIC_BLOCKS_INFO* CORINFO_THREAD_STATIC_INFO_NATIVEAOT*,CORINFO_THREAD_STATIC_INFO_NATIVEAOT* +CORINFO_OBJECT_ALLOC_CONTEXT_INFO*,CORINFO_OBJECT_ALLOC_CONTEXT_INFO* CORINFO_CALL_INFO*,CORINFO_CALL_INFO* CORINFO_DEVIRTUALIZATION_INFO*,CORINFO_DEVIRTUALIZATION_INFO* CORINFO_TYPE_LAYOUT_NODE*,CORINFO_TYPE_LAYOUT_NODE* @@ -275,6 +276,7 @@ FUNCTIONS uint32_t getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCtype) void getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) void getThreadLocalStaticInfo_NativeAOT(CORINFO_THREAD_STATIC_INFO_NATIVEAOT* pInfo) + void getObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) bool isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) int getArrayOrStringLength(CORINFO_OBJECT_HANDLE objHnd) void getBoundaries(CORINFO_METHOD_HANDLE ftn, unsigned int* cILOffsets, uint32_t** pILOffsets, ICorDebugInfo::BoundaryTypes* implicitBoundaries) diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index dec68b1eb53d4f..63eafb403db84a 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -113,6 +113,7 @@ struct JitInterfaceCallbacks uint32_t (* getThreadLocalFieldInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE field, bool isGCtype); void (* getThreadLocalStaticBlocksInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo); void (* getThreadLocalStaticInfo_NativeAOT)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_THREAD_STATIC_INFO_NATIVEAOT* pInfo); + void (* getObjectAllocContextInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo); bool (* isFieldStatic)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE fldHnd); int (* getArrayOrStringLength)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_OBJECT_HANDLE objHnd); void (* getBoundaries)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, unsigned int* cILOffsets, uint32_t** pILOffsets, ICorDebugInfo::BoundaryTypes* implicitBoundaries); @@ -1199,6 +1200,14 @@ class JitInterfaceWrapper : public ICorJitInfo if (pException != nullptr) throw pException; } + virtual void getObjectAllocContextInfo( + CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + CorInfoExceptionClass* pException = nullptr; + _callbacks->getObjectAllocContextInfo(_thisHandle, &pException, pInfo); + if (pException != nullptr) throw pException; +} + virtual bool isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 7dc7ca9fcf5211..bef4404cbd67e0 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -572,6 +572,19 @@ struct Agnostic_GetThreadStaticInfo_NativeAOT Agnostic_CORINFO_CONST_LOOKUP tlsGetAddrFtnPtr; }; +struct Agnostic_GetObjectAllocContextInfo +{ + DWORD supported; + DWORD allocPtrFieldOffset; + DWORD combinedLimitFieldOffset; + DWORD methodTableBaseSizeOffset; + Agnostic_CORINFO_CONST_LOOKUP tlsIndex; + DWORD offsetOfThreadLocalStoragePointer; + Agnostic_CORINFO_CONST_LOOKUP tlsRoot; + DWORDLONG tlsGetAddrFtnPtr; + DWORDLONG tlsRootOffset; +}; + struct Agnostic_GetClassCtorInitializationInfo { Agnostic_CORINFO_CONST_LOOKUP addr; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h index b464feef60b3f6..b0edb58e552b35 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h @@ -173,6 +173,7 @@ LWM(SatisfiesMethodConstraints, DLDL, DWORD) LWM(GetUnmanagedCallConv, MethodOrSigInfoValue, DD) LWM(DoesFieldBelongToClass, DLDL, DWORD) DENSELWM(SigInstHandleMap, DWORDLONG) +LWM(GetObjectAllocContextInfo, DWORD, Agnostic_GetObjectAllocContextInfo) LWM(GetWasmTypeSymbol, Agnostic_GetWasmTypeSymbol, DWORDLONG) #undef LWM diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 93c0e44a9f8098..a2cbe403710d43 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -7811,3 +7811,61 @@ void SetDebugDumpVariables() g_debugRep = true; } } + +void MethodContext::recGetObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + if (GetObjectAllocContextInfo == nullptr) + GetObjectAllocContextInfo = new LightWeightMap(); + + Agnostic_GetObjectAllocContextInfo value; + ZeroMemory(&value, sizeof(value)); + + value.supported = pInfo->supported ? 1 : 0; + value.allocPtrFieldOffset = pInfo->allocPtrFieldOffset; + value.combinedLimitFieldOffset = pInfo->combinedLimitFieldOffset; + value.methodTableBaseSizeOffset = pInfo->methodTableBaseSizeOffset; + value.tlsIndex = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(&pInfo->tlsIndex); + value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; + value.tlsRoot = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(&pInfo->tlsRoot); + value.tlsGetAddrFtnPtr = CastPointer(pInfo->tlsGetAddrFtnPtr); + value.tlsRootOffset = (DWORDLONG)pInfo->tlsRootOffset; + + DWORD key = 0; + GetObjectAllocContextInfo->Add(key, value); + DEBUG_REC(dmpGetObjectAllocContextInfo(key, value)); +} + +void MethodContext::dmpGetObjectAllocContextInfo(DWORD key, const Agnostic_GetObjectAllocContextInfo& value) +{ + printf("GetObjectAllocContextInfo key %u, supported-%u" + ", allocPtrFieldOffset-%u, combinedLimitFieldOffset-%u" + ", methodTableBaseSizeOffset-%u" + ", tlsIndex-%s, offsetOfThreadLocalStoragePointer-%u" + ", tlsRoot-%s, tlsGetAddrFtnPtr-%016" PRIX64 + ", tlsRootOffset-%016" PRIX64, + key, value.supported, + value.allocPtrFieldOffset, value.combinedLimitFieldOffset, + value.methodTableBaseSizeOffset, + SpmiDumpHelper::DumpAgnostic_CORINFO_CONST_LOOKUP(value.tlsIndex).c_str(), + value.offsetOfThreadLocalStoragePointer, + SpmiDumpHelper::DumpAgnostic_CORINFO_CONST_LOOKUP(value.tlsRoot).c_str(), + value.tlsGetAddrFtnPtr, value.tlsRootOffset); +} + +void MethodContext::repGetObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + DWORD key = 0; + Agnostic_GetObjectAllocContextInfo value = LookupByKeyOrMiss(GetObjectAllocContextInfo, key, ": key %u", key); + + DEBUG_REP(dmpGetObjectAllocContextInfo(key, value)); + + pInfo->supported = (value.supported != 0); + pInfo->allocPtrFieldOffset = value.allocPtrFieldOffset; + pInfo->combinedLimitFieldOffset = value.combinedLimitFieldOffset; + pInfo->methodTableBaseSizeOffset = value.methodTableBaseSizeOffset; + pInfo->tlsIndex = SpmiRecordsHelper::RestoreCORINFO_CONST_LOOKUP(value.tlsIndex); + pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; + pInfo->tlsRoot = SpmiRecordsHelper::RestoreCORINFO_CONST_LOOKUP(value.tlsRoot); + pInfo->tlsGetAddrFtnPtr = (void*)value.tlsGetAddrFtnPtr; + pInfo->tlsRootOffset = (size_t)value.tlsRootOffset; +} diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 067b2154d1e52f..bfc34c56b3a17f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -501,6 +501,10 @@ class MethodContext void dmpGetThreadLocalStaticInfo_NativeAOT(DWORDLONG key, const Agnostic_GetThreadStaticInfo_NativeAOT& value); void repGetThreadLocalStaticInfo_NativeAOT(CORINFO_THREAD_STATIC_INFO_NATIVEAOT* pInfo); + void recGetObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo); + void dmpGetObjectAllocContextInfo(DWORD key, const Agnostic_GetObjectAllocContextInfo& value); + void repGetObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo); + void recEmbedMethodHandle(CORINFO_METHOD_HANDLE handle, void** ppIndirection, CORINFO_METHOD_HANDLE result); void dmpEmbedMethodHandle(DWORDLONG key, DLDL value); CORINFO_METHOD_HANDLE repEmbedMethodHandle(CORINFO_METHOD_HANDLE handle, void** ppIndirection); @@ -1222,6 +1226,7 @@ enum mcPackets Packet_GetWasmTypeSymbol = 235, Packet_GetWasmLowering = 236, Packet_GetAsyncOtherVariant = 237, + Packet_GetObjectAllocContextInfo = 238, }; void SetDebugDumpVariables(); diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp index e29ba7b5cc97ff..ad9dc09d510a73 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -1131,6 +1131,13 @@ void interceptor_ICJI::getThreadLocalStaticInfo_NativeAOT(CORINFO_THREAD_STATIC_ mc->recGetThreadLocalStaticInfo_NativeAOT(pInfo); } +void interceptor_ICJI::getObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + mc->cr->AddCall("getObjectAllocContextInfo"); + original_ICorJitInfo->getObjectAllocContextInfo(pInfo); + mc->recGetObjectAllocContextInfo(pInfo); +} + // Returns true iff "fldHnd" represents a static field. bool interceptor_ICJI::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp index 8058b6802159e8..26e1b79dda6430 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp @@ -821,6 +821,13 @@ void interceptor_ICJI::getThreadLocalStaticInfo_NativeAOT( original_ICorJitInfo->getThreadLocalStaticInfo_NativeAOT(pInfo); } +void interceptor_ICJI::getObjectAllocContextInfo( + CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + mcs->AddCall("getObjectAllocContextInfo"); + original_ICorJitInfo->getObjectAllocContextInfo(pInfo); +} + bool interceptor_ICJI::isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp index 852a318f83c225..b0b2a30c542505 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp @@ -719,6 +719,12 @@ void interceptor_ICJI::getThreadLocalStaticInfo_NativeAOT( original_ICorJitInfo->getThreadLocalStaticInfo_NativeAOT(pInfo); } +void interceptor_ICJI::getObjectAllocContextInfo( + CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + original_ICorJitInfo->getObjectAllocContextInfo(pInfo); +} + bool interceptor_ICJI::isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index 20372434076341..5d54a7aefe369a 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -937,6 +937,12 @@ void MyICJI::getThreadLocalStaticInfo_NativeAOT(CORINFO_THREAD_STATIC_INFO_NATIV jitInstance->mc->repGetThreadLocalStaticInfo_NativeAOT(pInfo); } +void MyICJI::getObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + jitInstance->mc->cr->AddCall("getObjectAllocContextInfo"); + jitInstance->mc->repGetObjectAllocContextInfo(pInfo); +} + // Returns true iff "fldHnd" represents a static field. bool MyICJI::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 2d71f26409bc41..9562a3741653e4 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -323,6 +323,29 @@ LEAF_END GetTlsIndexObjectDescOffset, _TEXT #endif // !TARGET_ANDROID #endif // !TARGET_OSX +#ifdef TARGET_APPLE +# EXTERN_C void* GetRuntimeThreadLocalsThreadVarsAddress() +LEAF_ENTRY GetRuntimeThreadLocalsThreadVarsAddress, _TEXT + mov rdi, _t_runtime_thread_locals@TLVP[rip] + ret +LEAF_END GetRuntimeThreadLocalsThreadVarsAddress, _TEXT +#endif // TARGET_APPLE + +#ifndef TARGET_APPLE +#ifndef TARGET_ANDROID +# EXTERN_C void* GetRuntimeThreadLocalsTlsIndexObjectDescOffset(); +LEAF_ENTRY GetRuntimeThreadLocalsTlsIndexObjectDescOffset, _TEXT + .byte 0x66 + lea rdi, t_runtime_thread_locals@TLSGD[rip] + .byte 0x66 + .byte 0x66 + .byte 0x48 # rex.W prefix for padding + call EXTERNAL_C_FUNC(__tls_get_addr) + int 3 +LEAF_END GetRuntimeThreadLocalsTlsIndexObjectDescOffset, _TEXT +#endif // !TARGET_ANDROID +#endif // !TARGET_APPLE + LEAF_ENTRY JIT_PollGC, _TEXT PREPARE_EXTERNAL_VAR g_TrapReturningThreads, rax cmp dword ptr [rax], 0 diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 76ab00eb6700f3..9d8e9233eec7ea 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -658,6 +658,24 @@ LEAF_ENTRY GetTLSResolverAddress, _TEXT EPILOG_RETURN LEAF_END GetTLSResolverAddress, _TEXT // ------------------------------------------------------------------ + +// ------------------------------------------------------------------ +// size_t GetRuntimeThreadLocalsVariableOffset() + +// Helper to calculate the offset of native thread local variable `t_runtime_thread_locals` in TCB. +// The offset, after calculation is returned in `x0` register. + +LEAF_ENTRY GetRuntimeThreadLocalsVariableOffset, _TEXT + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + adrp x0, :tlsdesc:t_runtime_thread_locals + ldr x1, [x0, #:tlsdesc_lo12:t_runtime_thread_locals] + add x0, x0, :tlsdesc_lo12:t_runtime_thread_locals + .tlsdesccall t_runtime_thread_locals + blr x1 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + EPILOG_RETURN +LEAF_END GetRuntimeThreadLocalsVariableOffset, _TEXT +// ------------------------------------------------------------------ #endif // TARGET_ANDROID #endif // !TARGET_APPLE diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 291ee418eace51..a2c6b11a0f309d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -1329,6 +1329,63 @@ void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* EE_TO_JIT_TRANSITION(); } +/*********************************************************************/ +void CEEInfo::getObjectAllocContextInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + JIT_TO_EE_TRANSITION(); + + memset(pInfo, 0, sizeof(*pInfo)); + + // Inline allocation is only supported when thread allocation contexts are used, + // and when GC stress, allocation tracking, and allocation sampling are not active. + if (!GCHeapUtilities::UseThreadAllocationContexts()) + { + pInfo->supported = false; + } +#ifdef STRESS_HEAP + else if (GCStress::IsEnabled()) + { + pInfo->supported = false; + } +#endif + else if (CORProfilerTrackAllocations() || CORProfilerTrackAllocationsEnabled()) + { + pInfo->supported = false; + } + else if (ee_alloc_context::IsRandomizedSamplingEnabled()) + { + pInfo->supported = false; + } +#ifdef FEATURE_EVENT_TRACE + else if (ETW::TypeSystemLog::IsHeapAllocEventEnabled()) + { + pInfo->supported = false; + } +#endif + else + { + pInfo->supported = true; + + // ee_alloc_context offsets + pInfo->allocPtrFieldOffset = (uint32_t)(offsetof(ee_alloc_context, m_GCAllocContext) + offsetof(gc_alloc_context, alloc_ptr)); + pInfo->combinedLimitFieldOffset = (uint32_t)offsetof(ee_alloc_context, m_CombinedLimit); + + // MethodTable layout + pInfo->methodTableBaseSizeOffset = (uint32_t)cdac_data::BaseSize; + + // TLS access info - how to reach t_runtime_thread_locals + GetObjectAllocContextTlsInfo(pInfo); + } + + EE_TO_JIT_TRANSITION(); +} + /*********************************************************************/ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, diff --git a/src/coreclr/vm/threadstatics.cpp b/src/coreclr/vm/threadstatics.cpp index 23d8efd5be9fc2..47911ee254a4f3 100644 --- a/src/coreclr/vm/threadstatics.cpp +++ b/src/coreclr/vm/threadstatics.cpp @@ -1068,6 +1068,7 @@ static uint32_t ThreadLocalOffset(void* p) } #elif defined(TARGET_APPLE) extern "C" void* GetThreadVarsAddress(); +extern "C" void* GetRuntimeThreadLocalsThreadVarsAddress(); static void* GetThreadVarsSectionAddressFromDesc(uint8_t* p) { @@ -1108,6 +1109,7 @@ static void* GetThreadVarsSectionAddress() #ifdef TARGET_AMD64 extern "C" void* GetTlsIndexObjectDescOffset(); +extern "C" void* GetRuntimeThreadLocalsTlsIndexObjectDescOffset(); static void* GetThreadStaticDescriptor(uint8_t* p) { @@ -1144,6 +1146,7 @@ static void* GetTlsIndexObjectAddress() #elif !defined(TARGET_ANDROID) && defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) extern "C" size_t GetThreadStaticsVariableOffset(); +extern "C" size_t GetRuntimeThreadLocalsVariableOffset(); #endif // !TARGET_ANDROID && TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64 #endif // TARGET_WINDOWS @@ -1192,6 +1195,47 @@ void GetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) pInfo->offsetOfBaseOfThreadLocalData = (uint32_t)threadStaticBaseOffset; #endif // !TARGET_ANDROID } + +/*********************************************************************/ +// Returns TLS access information for t_runtime_thread_locals (the +// thread-local allocation context) so the JIT can inline object allocation. +void GetObjectAllocContextTlsInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo) +{ +#if !defined(TARGET_ANDROID) + STANDARD_VM_CONTRACT; + +#if defined(TARGET_WINDOWS) + pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); + pInfo->tlsIndex.accessType = IAT_VALUE; + pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); + pInfo->tlsRoot.addr = (void*)(uintptr_t)ThreadLocalOffset(&t_runtime_thread_locals); + pInfo->tlsRoot.accessType = IAT_VALUE; + +#elif defined(TARGET_APPLE) + // macOS (both x64 and ARM64) TLVP model not yet implemented in JIT codegen + pInfo->supported = false; + +#elif defined(TARGET_AMD64) + pInfo->tlsGetAddrFtnPtr = reinterpret_cast(&__tls_get_addr); + uint8_t* p = reinterpret_cast(&GetRuntimeThreadLocalsTlsIndexObjectDescOffset); + pInfo->tlsRoot.addr = GetThreadStaticDescriptor(p); + pInfo->tlsRoot.accessType = IAT_VALUE; + if (pInfo->tlsRoot.addr == nullptr) + { + pInfo->supported = false; + } + +#elif defined(TARGET_ARM64) + pInfo->tlsRootOffset = GetRuntimeThreadLocalsVariableOffset(); + +#else + pInfo->supported = false; +#endif // TARGET_WINDOWS + +#else // TARGET_ANDROID + pInfo->supported = false; +#endif // !TARGET_ANDROID +} #endif // !DACCESS_COMPILE #ifdef DACCESS_COMPILE diff --git a/src/coreclr/vm/threadstatics.h b/src/coreclr/vm/threadstatics.h index afbb3f25039031..c12d423350349d 100644 --- a/src/coreclr/vm/threadstatics.h +++ b/src/coreclr/vm/threadstatics.h @@ -347,6 +347,7 @@ void GetTLSIndexForThreadStatic(MethodTable* pMT, bool gcStatic, TLSIndex* pInde void FreeTLSIndicesForLoaderAllocator(LoaderAllocator *pLoaderAllocator); void* GetThreadLocalStaticBase(TLSIndex index); void GetThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo); +void GetObjectAllocContextTlsInfo(CORINFO_OBJECT_ALLOC_CONTEXT_INFO* pInfo); bool CanJITOptimizeTLSAccess(); #else void EnumThreadMemoryRegions(ThreadLocalData* pThreadLocalData, CLRDataEnumMemoryFlags flags);