-
Notifications
You must be signed in to change notification settings - Fork 269
Open
Description
Summary
OpConvertSToF %double %uint32 and OpConvertUToF %double %uint32 (i32→f64 conversion) cause ZE_RESULT_ERROR_DEVICE_LOST on Intel UHD integrated GPUs when fp64 emulation is enabled via IGC_EnableDPEmulation=1 OverrideDefaultFP64Settings=1.
The same kernel works correctly on discrete GPUs (Arc A770, Arc A380) which have native fp64.
Environment
- GPUs tested: Intel UHD Graphics 770 (Raptor Lake), Intel UHD Graphics 730 (Alder Lake) — both reproduce
- GPUs that work: Intel Arc A770, Intel Arc A380 (native fp64)
- Driver: compute-runtime latest
- API: Level Zero
Minimal reproducer
Three files. The SPIR-V kernel does nothing but convert two i32 values to f64 and store them via Generic pointers.
kernel.spvasm
OpCapability Addresses
OpCapability Kernel
OpCapability Float64
OpCapability Int64
OpCapability GenericPointer
OpCapability Int8
%1 = OpExtInstImport "OpenCL.std"
OpMemoryModel Physical64 OpenCL
OpEntryPoint Kernel %main "int2double"
%ulong = OpTypeInt 64 0
%uchar = OpTypeInt 8 0
%uint = OpTypeInt 32 0
%void = OpTypeVoid
%double = OpTypeFloat 64
%ptr_cwg = OpTypePointer CrossWorkgroup %uchar
%ptr_gen = OpTypePointer Generic %double
%ftype = OpTypeFunction %void %uint %uint %ptr_cwg %ptr_cwg
%main = OpFunction %void None %ftype
%x_int = OpFunctionParameter %uint
%x_uint = OpFunctionParameter %uint
%out_a = OpFunctionParameter %ptr_cwg
%out_b = OpFunctionParameter %ptr_cwg
%entry = OpLabel
%pa = OpConvertPtrToU %ulong %out_a
%ga = OpConvertUToPtr %ptr_gen %pa
%pb = OpConvertPtrToU %ulong %out_b
%gb = OpConvertUToPtr %ptr_gen %pb
%ca = OpConvertSToF %double %x_int
OpStore %ga %ca Aligned 8
%cb = OpConvertUToF %double %x_uint
OpStore %gb %cb Aligned 8
OpReturn
OpFunctionEnd
main.cpp
#include <fstream>
#include <iostream>
#include <limits>
#include <vector>
#include <level_zero/ze_api.h>
#define ZE(x) do{if(auto r=(x)){std::cerr<<#x<<":0x"<<std::hex<<r<<"\n";return 1;}}while(0)
int main(int argc,char**argv){
int ti=argc>1?atoi(argv[1]):-1;
ZE(zeInit(ZE_INIT_FLAG_GPU_ONLY));
uint32_t dc=0;ZE(zeDriverGet(&dc,nullptr));
std::vector<ze_driver_handle_t>drvs(dc);ZE(zeDriverGet(&dc,drvs.data()));
std::vector<std::pair<ze_driver_handle_t,ze_device_handle_t>>devs;
for(auto d:drvs){uint32_t n=0;ZE(zeDeviceGet(d,&n,nullptr));
std::vector<ze_device_handle_t>dd(n);ZE(zeDeviceGet(d,&n,dd.data()));
for(auto x:dd)devs.push_back({d,x});}
for(size_t i=0;i<devs.size();i++){ze_device_properties_t p{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
zeDeviceGetProperties(devs[i].second,&p);std::cout<<"["<<i<<"] "<<p.name<<"\n";}
if(ti<0||ti>=(int)devs.size()){std::cerr<<"Usage: "<<argv[0]<<" <idx>\n";return 1;}
auto[drv,dev]=devs[ti];
ze_context_desc_t cd{ZE_STRUCTURE_TYPE_CONTEXT_DESC};ze_context_handle_t ctx;
ZE(zeContextCreate(drv,&cd,&ctx));
ze_command_queue_desc_t cqd{};
ze_command_list_handle_t cl;ZE(zeCommandListCreateImmediate(ctx,dev,&cqd,&cl));
ze_event_pool_desc_t epd{ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,nullptr,ZE_EVENT_POOL_FLAG_HOST_VISIBLE,1};
ze_event_pool_handle_t ep;ZE(zeEventPoolCreate(ctx,&epd,0,nullptr,&ep));
ze_event_desc_t ed{ZE_STRUCTURE_TYPE_EVENT_DESC,nullptr,0,ZE_EVENT_SCOPE_FLAG_HOST,ZE_EVENT_SCOPE_FLAG_HOST};
ze_event_handle_t ev;ZE(zeEventCreate(ep,&ed,&ev));
std::ifstream f("kernel.spv",std::ios::binary|std::ios::ate);
auto sz=f.tellg();f.seekg(0);std::vector<char>spv(sz);f.read(spv.data(),sz);f.close();
ze_module_desc_t md{};md.format=ZE_MODULE_FORMAT_IL_SPIRV;
md.pInputModule=(const uint8_t*)spv.data();md.inputSize=sz;
ze_module_handle_t mod;ze_module_build_log_handle_t bl;
if(zeModuleCreate(ctx,dev,&md,&mod,&bl)){size_t n=0;zeModuleBuildLogGetString(bl,&n,nullptr);
std::string l(n,0);zeModuleBuildLogGetString(bl,&n,l.data());
std::cerr<<"Build:"<<l<<"\n";return 1;}
ze_device_mem_alloc_desc_t da{ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
ze_host_mem_alloc_desc_t ha{ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
double*o1,*o2;
ZE(zeMemAllocShared(ctx,&da,&ha,8,1,dev,(void**)&o1));
ZE(zeMemAllocShared(ctx,&da,&ha,8,1,dev,(void**)&o2));
ze_kernel_handle_t k;ze_kernel_desc_t kd{};kd.pKernelName="int2double";
ZE(zeKernelCreate(mod,&kd,&k));ZE(zeKernelSetGroupSize(k,1,1,1));
uint32_t iv=uint32_t(-7),uv=42;
ZE(zeKernelSetArgumentValue(k,0,4,&iv));ZE(zeKernelSetArgumentValue(k,1,4,&uv));
ZE(zeKernelSetArgumentValue(k,2,8,&o1));ZE(zeKernelSetArgumentValue(k,3,8,&o2));
ze_group_count_t gc{1,1,1};
ZE(zeCommandListAppendLaunchKernel(cl,k,&gc,ev,0,nullptr));
auto r=zeEventHostSynchronize(ev,UINT64_MAX);
if(r){std::cerr<<"DEVICE_LOST\n";return 1;}
std::cout<<"results: "<<*o1<<", "<<*o2<<"\n";
std::cout<<(*o1==-7.0&&*o2==42.0?"PASSED":"FAILED")<<"\n";
}Makefile
all: driver kernel.spv
driver: main.cpp
g++ -g -std=c++17 -o $@ $< -lze_loader
kernel.spv: kernel.spvasm
spirv-as $< -o $@
clean:
rm -f driver kernel.spvSteps to reproduce
make
# Pass device index for igpu (shown in device listing)
IGC_EnableDPEmulation=1 OverrideDefaultFP64Settings=1 ./driver <igpu_index>Expected result
results: -7, 42
PASSED
Actual result (igpu)
DEVICE_LOST
Notes
- The kernel is valid SPIR-V (passes
spirv-val) - Discrete GPUs with native fp64 run this kernel correctly
- Other fp64 emulated operations work on igpu (e.g.
OpConvertSToF i64→f64,OpFConvert f64→f32) - The specific failing operations are
OpConvertSToF %double %uintandOpConvertUToF %double %uint(i32→f64) - Discovered while investigating chipStar test failures on igpu
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels