Skip to content

DEVICE_LOST: OpConvertSToF/OpConvertUToF i32→double hangs igpu under fp64 emulation #907

@pvelesko

Description

@pvelesko

Summary

OpConvertSToF %double %uint32 and OpConvertUToF %double %uint32 (i32→f64 conversion) cause ZE_RESULT_ERROR_DEVICE_LOST on Intel UHD integrated GPUs when fp64 emulation is enabled via IGC_EnableDPEmulation=1 OverrideDefaultFP64Settings=1.

The same kernel works correctly on discrete GPUs (Arc A770, Arc A380) which have native fp64.

Environment

  • GPUs tested: Intel UHD Graphics 770 (Raptor Lake), Intel UHD Graphics 730 (Alder Lake) — both reproduce
  • GPUs that work: Intel Arc A770, Intel Arc A380 (native fp64)
  • Driver: compute-runtime latest
  • API: Level Zero

Minimal reproducer

Three files. The SPIR-V kernel does nothing but convert two i32 values to f64 and store them via Generic pointers.

kernel.spvasm

               OpCapability Addresses
               OpCapability Kernel
               OpCapability Float64
               OpCapability Int64
               OpCapability GenericPointer
               OpCapability Int8
          %1 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %main "int2double"
      %ulong = OpTypeInt 64 0
      %uchar = OpTypeInt 8 0
       %uint = OpTypeInt 32 0
       %void = OpTypeVoid
     %double = OpTypeFloat 64
    %ptr_cwg = OpTypePointer CrossWorkgroup %uchar
    %ptr_gen = OpTypePointer Generic %double
      %ftype = OpTypeFunction %void %uint %uint %ptr_cwg %ptr_cwg
       %main = OpFunction %void None %ftype
      %x_int = OpFunctionParameter %uint
     %x_uint = OpFunctionParameter %uint
      %out_a = OpFunctionParameter %ptr_cwg
      %out_b = OpFunctionParameter %ptr_cwg
      %entry = OpLabel
         %pa = OpConvertPtrToU %ulong %out_a
         %ga = OpConvertUToPtr %ptr_gen %pa
         %pb = OpConvertPtrToU %ulong %out_b
         %gb = OpConvertUToPtr %ptr_gen %pb
         %ca = OpConvertSToF %double %x_int
               OpStore %ga %ca Aligned 8
         %cb = OpConvertUToF %double %x_uint
               OpStore %gb %cb Aligned 8
               OpReturn
               OpFunctionEnd

main.cpp

#include <fstream>
#include <iostream>
#include <limits>
#include <vector>
#include <level_zero/ze_api.h>
#define ZE(x) do{if(auto r=(x)){std::cerr<<#x<<":0x"<<std::hex<<r<<"\n";return 1;}}while(0)
int main(int argc,char**argv){
  int ti=argc>1?atoi(argv[1]):-1;
  ZE(zeInit(ZE_INIT_FLAG_GPU_ONLY));
  uint32_t dc=0;ZE(zeDriverGet(&dc,nullptr));
  std::vector<ze_driver_handle_t>drvs(dc);ZE(zeDriverGet(&dc,drvs.data()));
  std::vector<std::pair<ze_driver_handle_t,ze_device_handle_t>>devs;
  for(auto d:drvs){uint32_t n=0;ZE(zeDeviceGet(d,&n,nullptr));
    std::vector<ze_device_handle_t>dd(n);ZE(zeDeviceGet(d,&n,dd.data()));
    for(auto x:dd)devs.push_back({d,x});}
  for(size_t i=0;i<devs.size();i++){ze_device_properties_t p{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
    zeDeviceGetProperties(devs[i].second,&p);std::cout<<"["<<i<<"] "<<p.name<<"\n";}
  if(ti<0||ti>=(int)devs.size()){std::cerr<<"Usage: "<<argv[0]<<" <idx>\n";return 1;}
  auto[drv,dev]=devs[ti];
  ze_context_desc_t cd{ZE_STRUCTURE_TYPE_CONTEXT_DESC};ze_context_handle_t ctx;
  ZE(zeContextCreate(drv,&cd,&ctx));
  ze_command_queue_desc_t cqd{};
  ze_command_list_handle_t cl;ZE(zeCommandListCreateImmediate(ctx,dev,&cqd,&cl));
  ze_event_pool_desc_t epd{ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,nullptr,ZE_EVENT_POOL_FLAG_HOST_VISIBLE,1};
  ze_event_pool_handle_t ep;ZE(zeEventPoolCreate(ctx,&epd,0,nullptr,&ep));
  ze_event_desc_t ed{ZE_STRUCTURE_TYPE_EVENT_DESC,nullptr,0,ZE_EVENT_SCOPE_FLAG_HOST,ZE_EVENT_SCOPE_FLAG_HOST};
  ze_event_handle_t ev;ZE(zeEventCreate(ep,&ed,&ev));
  std::ifstream f("kernel.spv",std::ios::binary|std::ios::ate);
  auto sz=f.tellg();f.seekg(0);std::vector<char>spv(sz);f.read(spv.data(),sz);f.close();
  ze_module_desc_t md{};md.format=ZE_MODULE_FORMAT_IL_SPIRV;
  md.pInputModule=(const uint8_t*)spv.data();md.inputSize=sz;
  ze_module_handle_t mod;ze_module_build_log_handle_t bl;
  if(zeModuleCreate(ctx,dev,&md,&mod,&bl)){size_t n=0;zeModuleBuildLogGetString(bl,&n,nullptr);
    std::string l(n,0);zeModuleBuildLogGetString(bl,&n,l.data());
    std::cerr<<"Build:"<<l<<"\n";return 1;}
  ze_device_mem_alloc_desc_t da{ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
  ze_host_mem_alloc_desc_t ha{ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
  double*o1,*o2;
  ZE(zeMemAllocShared(ctx,&da,&ha,8,1,dev,(void**)&o1));
  ZE(zeMemAllocShared(ctx,&da,&ha,8,1,dev,(void**)&o2));
  ze_kernel_handle_t k;ze_kernel_desc_t kd{};kd.pKernelName="int2double";
  ZE(zeKernelCreate(mod,&kd,&k));ZE(zeKernelSetGroupSize(k,1,1,1));
  uint32_t iv=uint32_t(-7),uv=42;
  ZE(zeKernelSetArgumentValue(k,0,4,&iv));ZE(zeKernelSetArgumentValue(k,1,4,&uv));
  ZE(zeKernelSetArgumentValue(k,2,8,&o1));ZE(zeKernelSetArgumentValue(k,3,8,&o2));
  ze_group_count_t gc{1,1,1};
  ZE(zeCommandListAppendLaunchKernel(cl,k,&gc,ev,0,nullptr));
  auto r=zeEventHostSynchronize(ev,UINT64_MAX);
  if(r){std::cerr<<"DEVICE_LOST\n";return 1;}
  std::cout<<"results: "<<*o1<<", "<<*o2<<"\n";
  std::cout<<(*o1==-7.0&&*o2==42.0?"PASSED":"FAILED")<<"\n";
}

Makefile

all: driver kernel.spv

driver: main.cpp
	g++ -g -std=c++17 -o $@ $< -lze_loader

kernel.spv: kernel.spvasm
	spirv-as $< -o $@

clean:
	rm -f driver kernel.spv

Steps to reproduce

make
# Pass device index for igpu (shown in device listing)
IGC_EnableDPEmulation=1 OverrideDefaultFP64Settings=1 ./driver <igpu_index>

Expected result

results: -7, 42
PASSED

Actual result (igpu)

DEVICE_LOST

Notes

  • The kernel is valid SPIR-V (passes spirv-val)
  • Discrete GPUs with native fp64 run this kernel correctly
  • Other fp64 emulated operations work on igpu (e.g. OpConvertSToF i64→f64, OpFConvert f64→f32)
  • The specific failing operations are OpConvertSToF %double %uint and OpConvertUToF %double %uint (i32→f64)
  • Discovered while investigating chipStar test failures on igpu

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions