Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit f734985

Browse files
committed
Update gpurt from commit 55ac0f61
Add Navi48 support
1 parent d142df3 commit f734985

File tree

105 files changed

+26374
-19
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+26374
-19
lines changed

CMakeLists.txt

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
##
22
#######################################################################################################################
33
#
4-
# Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
# Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a copy
77
# of this software and associated documentation files (the "Software"), to deal
@@ -85,6 +85,31 @@ target_link_libraries(gpurt_internal PUBLIC pal)
8585
# Default PAL variables are not visible here, but sometimes they are set explicitly by the user or build configuration.
8686
# Implicitly enable the related RTIPs in this case.
8787

88+
#if PAL_BUILD_GFX12
89+
if (PAL_BUILD_GFX12)
90+
set(GPURT_BUILD_RTIP3_1 ON)
91+
endif()
92+
#endif
93+
94+
#if GPURT_BUILD_RTIP3_1
95+
# RTIP3.1 depends on 3
96+
if (GPURT_BUILD_RTIP3_1)
97+
set(GPURT_BUILD_RTIP3 ON)
98+
endif()
99+
#endif
100+
101+
#if GPURT_BUILD_RTIP3
102+
if (GPURT_BUILD_RTIP3)
103+
gpurt_add_compile_definitions(GPURT_BUILD_RTIP3=1)
104+
endif()
105+
#endif
106+
107+
#if GPURT_BUILD_RTIP3_1
108+
if (GPURT_BUILD_RTIP3_1)
109+
gpurt_add_compile_definitions(GPURT_BUILD_RTIP3_1=1)
110+
endif()
111+
#endif
112+
88113
# Enable gpu developer mode if the client wants it.
89114
if (GPURT_DEVELOPER_MODE)
90115
gpurt_add_compile_definitions(GPURT_DEVELOPER=1)

backends/pal/gpurtPalBackend.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
***********************************************************************************************************************
33
*
4-
* Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
* Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -239,6 +239,10 @@ void PalBackend::InsertBarrier(
239239

240240
if (syncPreCpWrite)
241241
{
242+
#if PAL_BUILD_GFX12
243+
// Clients are expected to wait at PipelineStagePostPrefetch for API-level AS-related barrier operations.
244+
// However, the CoherCp access transition (GL2 flush on GFX12) is deferred until GPURT requires it.
245+
#endif
242246
memoryBarrier.srcStageMask |= Pal::PipelineStagePostPrefetch;
243247
memoryBarrier.srcAccessMask |= Pal::CoherShader;
244248
memoryBarrier.dstStageMask |= Pal::PipelineStagePostPrefetch;

backends/pal/gpurtPalBridge.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
***********************************************************************************************************************
33
*
4-
* Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved.
4+
* Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved.
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -104,6 +104,9 @@ PipelineShaderCode GPURT_API_ENTRY GetShaderLibraryCode(
104104
// =====================================================================================================================
105105
Pal::Result GPURT_API_ENTRY QueryRayTracingEntryFunctionTable(
106106
const Pal::RayTracingIpLevel rayTracingIpLevel,
107+
#if GPURT_BUILD_RTIP3
108+
bool bvh8Enable,
109+
#endif
107110
EntryFunctionTable* const pEntryFunctionTable)
108111
{
109112
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 40
@@ -112,6 +115,9 @@ Pal::Result GPURT_API_ENTRY QueryRayTracingEntryFunctionTable(
112115

113116
return Internal::QueryRayTracingEntryFunctionTable(
114117
rayTracingIpLevel,
118+
#if GPURT_BUILD_RTIP3
119+
bvh8Enable,
120+
#endif
115121
pEntryFunctionTable
116122
);
117123
}

gpurt/gpurt.h

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,18 @@ enum class StaticPipelineFlag : uint32
9090
EnableTraversalCounter = (1u << 28), // Enable Traversal counters
9191
Reserved = (1u << 27),
9292
EnableFusedInstanceNodes = (1u << 26), // Enable fused instance nodes
93+
#if GPURT_BUILD_RTIP3
94+
BvhHighPrecisionBoxNodeEnabled = (1u << 25), // Enable HP64 box node format
95+
Bvh8Enabled = (1u << 24), // Enable BVH8
96+
#else
9397
Reserved2 = (1u << 25),
9498
Reserved3 = (1u << 24),
99+
#endif
100+
#if GPURT_BUILD_RTIP3_1
101+
EnableOrientedBoundingBoxes = (1u << 23),
102+
#else
95103
Reserved4 = (1u << 23),
104+
#endif
96105
Reserved5 = (1u << 22),
97106
#if GPURT_ENABLE_GPU_DEBUG
98107
DebugAssertsHalt = (1u << 21),
@@ -113,6 +122,16 @@ constexpr size_t RayTracingBVHNodeSize = 64;
113122
// Byte size of a BVH4 node, each AABB encoded using 32bit floats
114123
constexpr size_t RayTracingQBVH32NodeSize = 128;
115124

125+
#if GPURT_BUILD_RTIP3
126+
// Byte size of a high precision box node
127+
constexpr size_t RayTracingHighPrecisionBoxNodeSize = 64;
128+
129+
#if GPURT_BUILD_RTIP3_1
130+
// Byte size of a quantized BVH8 box node
131+
constexpr size_t RayTracingQuantizedBVH8BoxNodeSize = 128;
132+
#endif
133+
#endif
134+
116135
// Byte size of a BVH4 node, each AABB encoded using 16bit floats
117136
constexpr size_t RayTracingQBVH16NodeSize = 64;
118137

@@ -325,8 +344,28 @@ enum class InternalRayTracingCsType : uint32
325344
UpdateAabbs,
326345
InitAccelerationStructure,
327346
InitUpdateAccelerationStructure,
347+
#if GPURT_BUILD_RTIP3_1
348+
RefitOrientedBounds,
349+
RefitOrientedBoundsTopLevel,
350+
CompressPrims,
351+
#endif
352+
#if GPURT_BUILD_RTIP3|| GPURT_BUILD_RTIP3_1
353+
BuildParallelRtip3x,
354+
#endif
328355
BuildFastAgglomerativeLbvh,
329356
EncodeQuadNodes,
357+
#if GPURT_BUILD_RTIP3_1
358+
BuildTrivialBvh,
359+
BuildSingleThreadGroup32,
360+
BuildSingleThreadGroup64,
361+
BuildSingleThreadGroup128,
362+
BuildSingleThreadGroup256,
363+
BuildSingleThreadGroup512,
364+
BuildSingleThreadGroup1024,
365+
EncodeHwBvh3_1,
366+
Update3_1,
367+
RefitInstanceBounds,
368+
#endif
330369
Count
331370
};
332371

@@ -744,6 +783,10 @@ struct DeviceSettings
744783
uint32 rebraidFactor; // Rebraid factor
745784
uint32 numRebraidIterations;
746785
uint32 rebraidQualityHeuristic;
786+
#if GPURT_BUILD_RTIP3_1
787+
float rebraidOpenSAFactor;
788+
uint32 rebraidOpenMinPrims;
789+
#endif
747790

748791
uint32 plocRadius; // PLOC nearest neighbor search adius
749792
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 54
@@ -794,6 +837,10 @@ struct DeviceSettings
794837
#endif
795838
uint32 allowFp16BoxNodesInUpdatableBvh : 1; // Allow box node in updatable bvh.
796839
uint32 fp16BoxNodesRequireCompaction : 1; // Compaction is set or not.
840+
#if GPURT_BUILD_RTIP3
841+
uint32 highPrecisionBoxNodeEnable : 1; // High precision box node enable
842+
uint32 bvh8Enable : 1; // Enable BVH8 box nodes
843+
#endif
797844
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 43
798845
uint32 enableSAHCost : 1; // Use more accurate SAH cost
799846
#endif
@@ -806,6 +853,10 @@ struct DeviceSettings
806853
#endif
807854

808855
uint32 enableRemapScratchBuffer : 1; // Enable remapping bvh2 data from ScratchBuffer to ResultBuffer
856+
#if GPURT_BUILD_RTIP3_1
857+
uint32 enableBvhChannelBalancing : 1; // Balance memory channels by adding variable padding to the BVH
858+
uint32 enableSingleThreadGroupBuild : 1; // Enable single thread group builder
859+
#endif
809860
uint32 checkBufferOverlapsInBatch : 1;
810861
uint32 disableCompaction : 1; // Reports and perform copy instead of compaction
811862
uint32 disableDegenPrims : 1; // Disable degenerate primitives, ie: set their vertex.x = NaN
@@ -826,6 +877,25 @@ struct DeviceSettings
826877

827878
uint32 gpuDebugFlags;
828879

880+
#if GPURT_BUILD_RTIP3_1
881+
uint8 trivialBuilderMaxPrimThreshold; // Max number of prims the trivial builder should
882+
// run on. Supports at most 16.
883+
// A value of 0 implies the builder is disabled.
884+
#endif
885+
#if GPURT_BUILD_RTIP3_1
886+
uint32 primCompressionFlags; // Debug flags for RTIP3.1+ primitive compression
887+
uint32 maxPrimRangeSize; // Maximum number of triangles in a prim range
888+
uint32 enableOrientedBoundingBoxes; // Enable oriented bounding box traversal and build
889+
uint32 tlasRefittingMode; // Enable TLAS leaf node refitting
890+
#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 49
891+
uint32 obbQuality; // Controls our OBB builder's K-DOP quality
892+
#else
893+
uint32 obbNumLevels; // Controls how many BVH levels are converted to OBBs
894+
uint32 obbDisableBuildFlags; // Controls which acceleration structures are affected by the OBB build pass
895+
#endif
896+
uint32 instanceMode; // Intersectable instance node mode (See InstanceMode)
897+
uint32 boxSplittingFlags; // Box splitting / Child reuse flags
898+
#endif
829899
#if GPURT_DEVELOPER
830900
RgpMarkerGranularityFlags rgpMarkerGranularityFlags;
831901
#endif
@@ -1455,6 +1525,9 @@ PipelineShaderCode GPURT_API_ENTRY GetShaderLibraryCode(
14551525
// @return whether the function table was found successfully
14561526
Pal::Result GPURT_API_ENTRY QueryRayTracingEntryFunctionTable(
14571527
const Pal::RayTracingIpLevel rayTracingIpLevel,
1528+
#if GPURT_BUILD_RTIP3
1529+
bool bvh8Enable,
1530+
#endif
14581531
EntryFunctionTable* const pEntryFunctionTable);
14591532

14601533
// =====================================================================================================================

0 commit comments

Comments
 (0)