Skip to content

[GSD-12492] DEVICE_LOST: OpConvertSToF/OpConvertUToF i32→double hangs igpu under fp64 emulation #907

@pvelesko

Description

@pvelesko

Summary

OpConvertSToF %double %uint32 and OpConvertUToF %double %uint32 (i32→f64 conversion) cause ZE_RESULT_ERROR_DEVICE_LOST on Intel UHD integrated GPUs when fp64 emulation is enabled via IGC_EnableDPEmulation=1 OverrideDefaultFP64Settings=1.

The same kernel works correctly on discrete GPUs (Arc A770, Arc A380) which have native fp64.

Environment

  • GPUs tested: Intel UHD Graphics 770 (Raptor Lake), Intel UHD Graphics 730 (Alder Lake) — both reproduce
  • GPUs that work: Intel Arc A770, Intel Arc A380 (native fp64)
  • Driver: compute-runtime latest
  • API: Level Zero

Minimal reproducer

Three files. The SPIR-V kernel does nothing but convert two i32 values to f64 and store them via Generic pointers.

kernel.spvasm

               OpCapability Addresses
               OpCapability Kernel
               OpCapability Float64
               OpCapability Int64
               OpCapability GenericPointer
               OpCapability Int8
          %1 = OpExtInstImport "OpenCL.std"
               OpMemoryModel Physical64 OpenCL
               OpEntryPoint Kernel %main "int2double"
      %ulong = OpTypeInt 64 0
      %uchar = OpTypeInt 8 0
       %uint = OpTypeInt 32 0
       %void = OpTypeVoid
     %double = OpTypeFloat 64
    %ptr_cwg = OpTypePointer CrossWorkgroup %uchar
    %ptr_gen = OpTypePointer Generic %double
      %ftype = OpTypeFunction %void %uint %uint %ptr_cwg %ptr_cwg
       %main = OpFunction %void None %ftype
      %x_int = OpFunctionParameter %uint
     %x_uint = OpFunctionParameter %uint
      %out_a = OpFunctionParameter %ptr_cwg
      %out_b = OpFunctionParameter %ptr_cwg
      %entry = OpLabel
         %pa = OpConvertPtrToU %ulong %out_a
         %ga = OpConvertUToPtr %ptr_gen %pa
         %pb = OpConvertPtrToU %ulong %out_b
         %gb = OpConvertUToPtr %ptr_gen %pb
         %ca = OpConvertSToF %double %x_int
               OpStore %ga %ca Aligned 8
         %cb = OpConvertUToF %double %x_uint
               OpStore %gb %cb Aligned 8
               OpReturn
               OpFunctionEnd

main.cpp

#include <fstream>
#include <iostream>
#include <limits>
#include <vector>
#include <level_zero/ze_api.h>
#define ZE(x) do{if(auto r=(x)){std::cerr<<#x<<":0x"<<std::hex<<r<<"\n";return 1;}}while(0)
int main(int argc,char**argv){
  int ti=argc>1?atoi(argv[1]):-1;
  ZE(zeInit(ZE_INIT_FLAG_GPU_ONLY));
  uint32_t dc=0;ZE(zeDriverGet(&dc,nullptr));
  std::vector<ze_driver_handle_t>drvs(dc);ZE(zeDriverGet(&dc,drvs.data()));
  std::vector<std::pair<ze_driver_handle_t,ze_device_handle_t>>devs;
  for(auto d:drvs){uint32_t n=0;ZE(zeDeviceGet(d,&n,nullptr));
    std::vector<ze_device_handle_t>dd(n);ZE(zeDeviceGet(d,&n,dd.data()));
    for(auto x:dd)devs.push_back({d,x});}
  for(size_t i=0;i<devs.size();i++){ze_device_properties_t p{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
    zeDeviceGetProperties(devs[i].second,&p);std::cout<<"["<<i<<"] "<<p.name<<"\n";}
  if(ti<0||ti>=(int)devs.size()){std::cerr<<"Usage: "<<argv[0]<<" <idx>\n";return 1;}
  auto[drv,dev]=devs[ti];
  ze_context_desc_t cd{ZE_STRUCTURE_TYPE_CONTEXT_DESC};ze_context_handle_t ctx;
  ZE(zeContextCreate(drv,&cd,&ctx));
  ze_command_queue_desc_t cqd{};
  ze_command_list_handle_t cl;ZE(zeCommandListCreateImmediate(ctx,dev,&cqd,&cl));
  ze_event_pool_desc_t epd{ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,nullptr,ZE_EVENT_POOL_FLAG_HOST_VISIBLE,1};
  ze_event_pool_handle_t ep;ZE(zeEventPoolCreate(ctx,&epd,0,nullptr,&ep));
  ze_event_desc_t ed{ZE_STRUCTURE_TYPE_EVENT_DESC,nullptr,0,ZE_EVENT_SCOPE_FLAG_HOST,ZE_EVENT_SCOPE_FLAG_HOST};
  ze_event_handle_t ev;ZE(zeEventCreate(ep,&ed,&ev));
  std::ifstream f("kernel.spv",std::ios::binary|std::ios::ate);
  auto sz=f.tellg();f.seekg(0);std::vector<char>spv(sz);f.read(spv.data(),sz);f.close();
  ze_module_desc_t md{};md.format=ZE_MODULE_FORMAT_IL_SPIRV;
  md.pInputModule=(const uint8_t*)spv.data();md.inputSize=sz;
  ze_module_handle_t mod;ze_module_build_log_handle_t bl;
  if(zeModuleCreate(ctx,dev,&md,&mod,&bl)){size_t n=0;zeModuleBuildLogGetString(bl,&n,nullptr);
    std::string l(n,0);zeModuleBuildLogGetString(bl,&n,l.data());
    std::cerr<<"Build:"<<l<<"\n";return 1;}
  ze_device_mem_alloc_desc_t da{ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
  ze_host_mem_alloc_desc_t ha{ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
  double*o1,*o2;
  ZE(zeMemAllocShared(ctx,&da,&ha,8,1,dev,(void**)&o1));
  ZE(zeMemAllocShared(ctx,&da,&ha,8,1,dev,(void**)&o2));
  ze_kernel_handle_t k;ze_kernel_desc_t kd{};kd.pKernelName="int2double";
  ZE(zeKernelCreate(mod,&kd,&k));ZE(zeKernelSetGroupSize(k,1,1,1));
  uint32_t iv=uint32_t(-7),uv=42;
  ZE(zeKernelSetArgumentValue(k,0,4,&iv));ZE(zeKernelSetArgumentValue(k,1,4,&uv));
  ZE(zeKernelSetArgumentValue(k,2,8,&o1));ZE(zeKernelSetArgumentValue(k,3,8,&o2));
  ze_group_count_t gc{1,1,1};
  ZE(zeCommandListAppendLaunchKernel(cl,k,&gc,ev,0,nullptr));
  auto r=zeEventHostSynchronize(ev,UINT64_MAX);
  if(r){std::cerr<<"DEVICE_LOST\n";return 1;}
  std::cout<<"results: "<<*o1<<", "<<*o2<<"\n";
  std::cout<<(*o1==-7.0&&*o2==42.0?"PASSED":"FAILED")<<"\n";
}

Makefile

all: driver kernel.spv

driver: main.cpp
	g++ -g -std=c++17 -o $@ $< -lze_loader

kernel.spv: kernel.spvasm
	spirv-as $< -o $@

clean:
	rm -f driver kernel.spv

Steps to reproduce

make
# Pass device index for igpu (shown in device listing)
IGC_EnableDPEmulation=1 OverrideDefaultFP64Settings=1 ./driver <igpu_index>

Expected result

results: -7, 42
PASSED

Actual result (igpu)

DEVICE_LOST

Notes

  • The kernel is valid SPIR-V (passes spirv-val)
  • Discrete GPUs with native fp64 run this kernel correctly
  • Other fp64 emulated operations work on igpu (e.g. OpConvertSToF i64→f64, OpFConvert f64→f32)
  • The specific failing operations are OpConvertSToF %double %uint and OpConvertUToF %double %uint (i32→f64)
  • Discovered while investigating chipStar test failures on igpu

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions