From 3ae5a6bf4a69b5b38623906b14451062f19c312b Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Tue, 28 Apr 2026 14:39:21 +0100 Subject: [PATCH 1/3] [SYCL] Fix SPIR-V type requirements for image operations The SPIR-V OpenCL environment spec requires OpImageRead, OpImageWrite, OpImageFetch, and OpImageSampleExplicitLod to use vec4 operands with 32-bit component types (or 16-bit for half). Channel sizes and narrow integer types (8-bit and 16-bit) must be widened to their vec4 32-bit equivalents for the SPIR-V instruction. All __invoke__Image* functions now use these helpers to ensure correct SPIR-V type generation. --- sycl/include/sycl/detail/image_ocl_types.hpp | 269 +++++++++++++---- .../check_device_code/image_spirv_types.cpp | 281 ++++++++++++++++++ 2 files changed, 495 insertions(+), 55 deletions(-) create mode 100644 sycl/test/check_device_code/image_spirv_types.cpp diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 1880c7e520c54..dcdccb943943a 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -30,7 +30,6 @@ #include #include -#include #include @@ -41,24 +40,112 @@ namespace detail { // Type trait to get the associated sampled image type for a given image type. template struct sampled_opencl_image_type; -// The SPIR-V spec requires the result of OpImageSampleExplicitLod to be a -// vector type of four components. To satisfy this requirement, we need to use -// a temporary vector type to hold the result of the SPIR-V call, and then -// copy the result back to the original return type. The following type trait is -// used to get the temporary vector type based on the original return type. - -template struct image_sample_explicit_lod_result { - using type = sycl::vec; +// Helper to extract element type from scalar or OpenCL vector type +// (e.g., float or float __attribute__((ext_vector_type(4)))) +template struct get_image_element_type { using type = T; }; +template +struct get_image_element_type { + using type = T; }; +template +using get_image_element_type_t = typename get_image_element_type::type; -template -struct image_sample_explicit_lod_result> { - using type = sycl::vec; +// Helper to get number of elements (1 for scalar, N for vector) +template struct get_num_elements { + static constexpr int value = 1; +}; +template +struct get_num_elements { + static constexpr int value = N; }; +template +inline constexpr int get_num_elements_v = get_num_elements::value; + +// The OpenCL SPIR-V environment spec requires that OpImageRead, OpImageWrite, +// OpImageFetch, and OpImageSampleExplicitLod use vec4 operands with 32-bit +// component types, with the sole exception of half (_Float16) which may use +// 16-bit components. Channel sizes and narrow integer types (8-bit and 16-bit) +// must be widened to their vec4 32-bit equivalents. +template struct spirv_image_widened_elem_type { using type = T; }; +template <> struct spirv_image_widened_elem_type { using type = int32_t; }; +template <> struct spirv_image_widened_elem_type { using type = uint32_t; }; +template <> struct spirv_image_widened_elem_type { using type = int32_t; }; +template <> struct spirv_image_widened_elem_type { using type = uint32_t; }; +template +using spirv_image_widened_elem_type_t = typename spirv_image_widened_elem_type::type; + +// Compile-time verification of type widening rules. +static_assert(std::is_same_v, int32_t>); +static_assert(std::is_same_v, uint32_t>); +static_assert(std::is_same_v, int32_t>); +static_assert(std::is_same_v, uint32_t>); +static_assert(std::is_same_v, int32_t>); +static_assert(std::is_same_v, uint32_t>); +static_assert(std::is_same_v, float>); +static_assert(std::is_same_v, _Float16>); + +// Helper function to convert vec4 result to requested OpenCL type. +// Handles scalar, vec2, vec3, and vec4 return types. +template +static inline constexpr RequestedType convertVec4ToRequestedType(Vec4Type vec4Result) { + using ElemType = get_image_element_type_t; + constexpr int NumElements = get_num_elements_v; + + // Extract components based on RequestedType + if constexpr (NumElements == 1) { + return static_cast(vec4Result[0]); + } else if constexpr (NumElements == 2) { + using Vec2Type = ElemType __attribute__((ext_vector_type(2))); + Vec2Type result; + result[0] = static_cast(vec4Result[0]); + result[1] = static_cast(vec4Result[1]); + return result; + } else if constexpr (NumElements == 3) { + using Vec3Type = ElemType __attribute__((ext_vector_type(3))); + Vec3Type result; + result[0] = static_cast(vec4Result[0]); + result[1] = static_cast(vec4Result[1]); + result[2] = static_cast(vec4Result[2]); + return result; + } else { + static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4"); + using Vec4NarrowType = ElemType __attribute__((ext_vector_type(4))); + Vec4NarrowType result; + result[0] = static_cast(vec4Result[0]); + result[1] = static_cast(vec4Result[1]); + result[2] = static_cast(vec4Result[2]); + result[3] = static_cast(vec4Result[3]); + return result; + } +} -template -using image_sample_explicit_lod_result_t = - typename image_sample_explicit_lod_result::type; +// Helper function to convert scalar or OpenCL vector value to vec4 for OpImageWrite. +template +static inline constexpr auto convertRequestedTypeToVec4(SourceType val) { + using RawElemType = get_image_element_type_t; + using ElemType = spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + constexpr int NumElements = get_num_elements_v; + + Vec4Type result{}; + if constexpr (NumElements == 1) { + result[0] = static_cast(val); + } else if constexpr (NumElements == 2) { + result[0] = static_cast(val[0]); + result[1] = static_cast(val[1]); + } else if constexpr (NumElements == 3) { + result[0] = static_cast(val[0]); + result[1] = static_cast(val[1]); + result[2] = static_cast(val[2]); + } else { + static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4"); + result[0] = static_cast(val[0]); + result[1] = static_cast(val[1]); + result[2] = static_cast(val[2]); + result[3] = static_cast(val[3]); + } + return result; +} } // namespace detail } // namespace _V1 @@ -84,8 +171,11 @@ static void __invoke__ImageWrite(ImageT Img, CoordT Coords, ValT Val) { auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); auto TmpVal = sycl::detail::convertToOpenCLType(Val); - __spirv_ImageWrite( - Img, TmpCoords, TmpVal); + // SPIR-V spec requires OpImageWrite texel to be vec4. + auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal); + + __spirv_ImageWrite( + Img, TmpCoords, vec4Val); } template @@ -95,8 +185,16 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageRead(Img, TmpCoords); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageRead(Img, TmpCoords)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -106,9 +204,17 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageFetch(Img, + TmpCoords); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageFetch(Img, - TmpCoords)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -118,9 +224,17 @@ static RetType __invoke__SampledImageFetch(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_SampledImageFetch( + Img, TmpCoords); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_SampledImageFetch( - Img, TmpCoords)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -147,9 +261,17 @@ static RetType __invoke__ImageArrayFetch(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageArrayFetch( + Img, TmpCoords, ArrayLayer); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageArrayFetch( - Img, TmpCoords, ArrayLayer)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -160,9 +282,17 @@ static RetType __invoke__SampledImageArrayFetch(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_SampledImageArrayFetch( + Img, TmpCoords, ArrayLayer); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_SampledImageArrayFetch( - Img, TmpCoords, ArrayLayer)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -173,9 +303,17 @@ static RetType __invoke__ImageArrayRead(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageArrayRead( + Img, TmpCoords, ArrayLayer); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageArrayRead( - Img, TmpCoords, ArrayLayer)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -186,8 +324,11 @@ static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer, auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); auto TmpVal = sycl::detail::convertToOpenCLType(Val); - __spirv_ImageArrayWrite( - Img, TmpCoords, ArrayLayer, TmpVal); + // SPIR-V spec requires OpImageWrite texel to be vec4. + auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal); + + __spirv_ImageArrayWrite( + Img, TmpCoords, ArrayLayer, vec4Val); } template @@ -197,25 +338,24 @@ static RetType __invoke__ImageReadCubemap(SmpImageT SmpImg, DirVecT DirVec) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpDirVec = sycl::detail::convertToOpenCLType(DirVec); + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageSampleCubemap( + SmpImg, TmpDirVec); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleCubemap( - SmpImg, TmpDirVec)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, float Level) { - // The result type of the SPIR-V instruction OpImageSampleExplicitLod must be - // a vector of four components. Use the type trait to get the appropriate - // temporary vector type based on the original return type. - using NoRefT = std::remove_reference_t; - using RetVecType = sycl::detail::image_sample_explicit_lod_result_t; - static_assert(sizeof(RetVecType) >= sizeof(RetType), - "RetVecType should be at least as big as RetType to hold the " - "result of the SPIR-V call."); - // Convert from sycl types to builtin types to get correct function mangling. - using TempRetT = sycl::detail::ConvertToOpenCLType_t; + using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); enum ImageOperands { Lod = 0x2 }; @@ -226,14 +366,17 @@ static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present - auto ResultVec = sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleExplicitLod( - SmpImg, TmpCoords, ImageOperands::Lod, Level)); - - // Copy the result back to the original return type the user expects. - RetType Result; - sycl::detail::memcpy_no_adl(&Result, &ResultVec, sizeof(Result)); - return Result; + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageSampleExplicitLod( + SmpImg, TmpCoords, ImageOperands::Lod, Level); + + return sycl::detail::convertFromOpenCLTypeFor( + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -254,9 +397,17 @@ static RetType __invoke__ImageReadGrad(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageSampleExplicitLod( + SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleExplicitLod( - SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY)); + sycl::detail::convertVec4ToRequestedType(vec4Result)); } template @@ -281,10 +432,18 @@ static RetType __invoke__ImageReadSampler(ImageT Img, CoordT Coords, enum ImageOperands { Lod = 0x2 }; // Lod value is zero as mipmap is not supported. - return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleExplicitLod( + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. + using RawElemType = sycl::detail::get_image_element_type_t; + using ElemType = sycl::detail::spirv_image_widened_elem_type_t; + using Vec4Type = ElemType __attribute__((ext_vector_type(4))); + + Vec4Type vec4Result = + __spirv_ImageSampleExplicitLod( __spirv_SampledImage(Img, Smpl), TmpCoords, - ImageOperands::Lod, 0.0f)); + ImageOperands::Lod, 0.0f); + + return sycl::detail::convertFromOpenCLTypeFor( + sycl::detail::convertVec4ToRequestedType(vec4Result)); } namespace sycl { diff --git a/sycl/test/check_device_code/image_spirv_types.cpp b/sycl/test/check_device_code/image_spirv_types.cpp new file mode 100644 index 0000000000000..d1922b7e2c1fb --- /dev/null +++ b/sycl/test/check_device_code/image_spirv_types.cpp @@ -0,0 +1,281 @@ +// Check that image operations use supported types by OpenCL SPIR-V env spec. +// +// The OpenCL SPIR-V environment spec requires that OpImageRead, OpImageWrite, +// OpImageFetch, and OpImageSampleExplicitLod use vec4 operands with 32-bit +// component types (or 16-bit for half). This test verifies that channel sizes +// and narrow integer types (int8_t, uint8_t, int16_t, uint16_t) are properly +// widened to vec4 32-bit in the generated SPIR-V calls. + +// RUN: %clangxx -O2 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s + +#include + +using OCLImageTyRead = + typename sycl::detail::opencl_image_type<2, sycl::access::mode::read, + sycl::access::target::image>::type; + +using OCLImageTyWrite = + typename sycl::detail::opencl_image_type<2, sycl::access::mode::write, + sycl::access::target::image>::type; + +using OCLSampledImageTy = + typename sycl::detail::sampled_opencl_image_type::type; + +// Test int8_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL int8_t test_int8_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test uint8_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint8_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL uint8_t test_uint8_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test int16_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int16_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL int16_t test_int16_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test uint16_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL uint16_t test_uint16_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test sycl::vec read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_vec4_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} <4 x i8> @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL sycl::vec test_int8_vec4_read(OCLImageTyRead img) { + return __invoke__ImageRead>(img, sycl::int2(0, 0)); +} + +// Test float - should NOT widen (already 32-bit) +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_float_read +// CHECK: call {{.*}} <4 x float> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} float @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL float test_float_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test int32_t - should NOT widen (already 32-bit) +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int32_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i32 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL int32_t test_int32_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test sycl::half - should NOT widen (16-bit is allowed for half) +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_half_read +// CHECK: call {{.*}} <4 x half> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} half @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL sycl::half test_half_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test int8_t write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} i8 +SYCL_EXTERNAL void test_int8_write(OCLImageTyWrite img, int8_t val) { + __invoke__ImageWrite(img, sycl::int2(0, 0), val); +} + +// Test uint16_t write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} i16 +SYCL_EXTERNAL void test_uint16_write(OCLImageTyWrite img, uint16_t val) { + __invoke__ImageWrite(img, sycl::int2(0, 0), val); +} + +// Test sycl::vec write - should widen to <4 x i32> with zero-fill +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint8_vec2_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} <2 x i8> +SYCL_EXTERNAL void test_uint8_vec2_write(OCLImageTyWrite img, sycl::vec val) { + __invoke__ImageWrite(img, sycl::int2(0, 0), val); +} + +// Test int8_t fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageFetch +SYCL_EXTERNAL int8_t test_int8_fetch(OCLImageTyRead img) { + return __invoke__ImageFetch(img, sycl::int2(0, 0)); +} + +// Test uint16_t fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageFetch +SYCL_EXTERNAL uint16_t test_uint16_fetch(OCLImageTyRead img) { + return __invoke__ImageFetch(img, sycl::int2(0, 0)); +} + +// Test int8_t sampled fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_sampled_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_SampledImageFetch +SYCL_EXTERNAL int8_t test_int8_sampled_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageFetch(img, sycl::int2(0, 0)); +} + +// Test uint16_t sampled fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_sampled_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_SampledImageFetch +SYCL_EXTERNAL uint16_t test_uint16_sampled_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageFetch(img, sycl::int2(0, 0)); +} + +// Test int8_t array read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayRead +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageArrayRead +SYCL_EXTERNAL int8_t test_int8_array_read(OCLImageTyRead img) { + return __invoke__ImageArrayRead(img, sycl::int2(0, 0), 0); +} + +// Test uint16_t array read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayRead +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageArrayRead +SYCL_EXTERNAL uint16_t test_uint16_array_read(OCLImageTyRead img) { + return __invoke__ImageArrayRead(img, sycl::int2(0, 0), 0); +} + +// Test int8_t array write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageArrayWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageArrayWrite{{.*}} i8 +SYCL_EXTERNAL void test_int8_array_write(OCLImageTyWrite img, int8_t val) { + __invoke__ImageArrayWrite(img, sycl::int2(0, 0), 0, val); +} + +// Test uint16_t array write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageArrayWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageArrayWrite{{.*}} i16 +SYCL_EXTERNAL void test_uint16_array_write(OCLImageTyWrite img, uint16_t val) { + __invoke__ImageArrayWrite(img, sycl::int2(0, 0), 0, val); +} + +// Test int8_t array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageArrayFetch +SYCL_EXTERNAL int8_t test_int8_array_fetch(OCLImageTyRead img) { + return __invoke__ImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test uint16_t array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageArrayFetch +SYCL_EXTERNAL uint16_t test_uint16_array_fetch(OCLImageTyRead img) { + return __invoke__ImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test int8_t sampled array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_sampled_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageArrayFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_SampledImageArrayFetch +SYCL_EXTERNAL int8_t test_int8_sampled_array_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test uint16_t sampled array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_sampled_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageArrayFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_SampledImageArrayFetch +SYCL_EXTERNAL uint16_t test_uint16_sampled_array_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test int8_t cubemap read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_cubemap +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleCubemap +SYCL_EXTERNAL int8_t test_int8_read_cubemap(OCLSampledImageTy img) { + return __invoke__ImageReadCubemap(img, sycl::float3(0.0f, 0.0f, 0.0f)); +} + +// Test uint16_t cubemap read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_cubemap +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleCubemap +SYCL_EXTERNAL uint16_t test_uint16_read_cubemap(OCLSampledImageTy img) { + return __invoke__ImageReadCubemap(img, sycl::float3(0.0f, 0.0f, 0.0f)); +} + +// Test int8_t lod read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_lod +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL int8_t test_int8_read_lod(OCLSampledImageTy img) { + return __invoke__ImageReadLod(img, sycl::float2(0.0f, 0.0f), 0.0f); +} + +// Test uint16_t lod read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_lod +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL uint16_t test_uint16_read_lod(OCLSampledImageTy img) { + return __invoke__ImageReadLod(img, sycl::float2(0.0f, 0.0f), 0.0f); +} + +// Test int8_t grad read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_grad +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL int8_t test_int8_read_grad(OCLSampledImageTy img) { + return __invoke__ImageReadGrad(img, sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f)); +} + +// Test uint16_t grad read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_grad +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL uint16_t test_uint16_read_grad(OCLSampledImageTy img) { + return __invoke__ImageReadGrad(img, sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f)); +} + +// Test int8_t sampler read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_sampler +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL int8_t test_int8_read_sampler(OCLImageTyRead img, + const __ocl_sampler_t &smpl) { + return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), smpl); +} + +// Test uint16_t sampler read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_sampler +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL uint16_t test_uint16_read_sampler(OCLImageTyRead img, + const __ocl_sampler_t &smpl) { + return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), smpl); +} From c01ad0f5f9054da3038c593b3b5ac59f0f78d15c Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Tue, 5 May 2026 16:08:08 +0100 Subject: [PATCH 2/3] Remove static_asserts --- sycl/include/sycl/detail/image_ocl_types.hpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index dcdccb943943a..ac4af0e7ca7f3 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -74,16 +74,6 @@ template <> struct spirv_image_widened_elem_type { using type = uint32 template using spirv_image_widened_elem_type_t = typename spirv_image_widened_elem_type::type; -// Compile-time verification of type widening rules. -static_assert(std::is_same_v, int32_t>); -static_assert(std::is_same_v, uint32_t>); -static_assert(std::is_same_v, int32_t>); -static_assert(std::is_same_v, uint32_t>); -static_assert(std::is_same_v, int32_t>); -static_assert(std::is_same_v, uint32_t>); -static_assert(std::is_same_v, float>); -static_assert(std::is_same_v, _Float16>); - // Helper function to convert vec4 result to requested OpenCL type. // Handles scalar, vec2, vec3, and vec4 return types. template From 229491e26d0a7bb819c69aea67433cdcd5dc849c Mon Sep 17 00:00:00 2001 From: Sean Stirling Date: Thu, 28 May 2026 16:11:29 +0100 Subject: [PATCH 3/3] Guard NVPTX and AMDGCN --- sycl/include/sycl/detail/image_ocl_types.hpp | 265 ++++++++++-------- .../check_device_code/image_spirv_types.cpp | 23 +- 2 files changed, 161 insertions(+), 127 deletions(-) diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index ac4af0e7ca7f3..7d435154ed3bf 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -42,7 +42,9 @@ template struct sampled_opencl_image_type; // Helper to extract element type from scalar or OpenCL vector type // (e.g., float or float __attribute__((ext_vector_type(4)))) -template struct get_image_element_type { using type = T; }; +template struct get_image_element_type { + using type = T; +}; template struct get_image_element_type { using type = T; @@ -66,59 +68,108 @@ inline constexpr int get_num_elements_v = get_num_elements::value; // component types, with the sole exception of half (_Float16) which may use // 16-bit components. Channel sizes and narrow integer types (8-bit and 16-bit) // must be widened to their vec4 32-bit equivalents. -template struct spirv_image_widened_elem_type { using type = T; }; -template <> struct spirv_image_widened_elem_type { using type = int32_t; }; -template <> struct spirv_image_widened_elem_type { using type = uint32_t; }; -template <> struct spirv_image_widened_elem_type { using type = int32_t; }; -template <> struct spirv_image_widened_elem_type { using type = uint32_t; }; +// +// On NVPTX and AMDGCN, the __spirv_Image* builtins are implemented directly in +// libclc with byte strides taken into account. Widening to 32-bit or vec4 +// selects the wrong libclc overload. On those targets spirv_image_call_type_t +// leaves the type unchanged. +#if !defined(__NVPTX__) && !defined(__AMDGCN__) +template struct spirv_image_widened_elem_type { + using type = T; +}; +template <> struct spirv_image_widened_elem_type { + using type = int32_t; +}; +template <> struct spirv_image_widened_elem_type { + using type = uint32_t; +}; +template <> struct spirv_image_widened_elem_type { + using type = int32_t; +}; +template <> struct spirv_image_widened_elem_type { + using type = uint32_t; +}; +template +using spirv_image_widened_elem_type_t = + typename spirv_image_widened_elem_type::type; +#endif + +// spirv_image_call_type_t: the type to use for __spirv_Image* read/write +// calls. On SPIR-V/OpenCL: widened elem type forced to vec4. On NVPTX/AMDGCN: T +// unchanged (libclc has native-typed overloads). +#if defined(__NVPTX__) || defined(__AMDGCN__) +template struct spirv_image_call_type { + using type = T; +}; +#else +template struct spirv_image_call_type { + using ElemT = spirv_image_widened_elem_type_t>; + using type = ElemT __attribute__((ext_vector_type(4))); +}; +#endif template -using spirv_image_widened_elem_type_t = typename spirv_image_widened_elem_type::type; +using spirv_image_call_type_t = typename spirv_image_call_type::type; + +// Helper function to convert the result of a __spirv_Image* call back to the +// requested OpenCL type. +// On SPIR-V/OpenCL the call returns a widened vec4. +// On NVPTX/AMDGCN CallT == RequestedType so this reduces to identity casts. +template +static inline constexpr RequestedType +convertImageCallResultToOpenCLType(CallT callResult) { + if constexpr (std::is_same_v) + return callResult; -// Helper function to convert vec4 result to requested OpenCL type. -// Handles scalar, vec2, vec3, and vec4 return types. -template -static inline constexpr RequestedType convertVec4ToRequestedType(Vec4Type vec4Result) { using ElemType = get_image_element_type_t; constexpr int NumElements = get_num_elements_v; - // Extract components based on RequestedType if constexpr (NumElements == 1) { - return static_cast(vec4Result[0]); + if constexpr (get_num_elements_v == 1) + return static_cast(callResult); + else + return static_cast(callResult[0]); } else if constexpr (NumElements == 2) { using Vec2Type = ElemType __attribute__((ext_vector_type(2))); Vec2Type result; - result[0] = static_cast(vec4Result[0]); - result[1] = static_cast(vec4Result[1]); + result[0] = static_cast(callResult[0]); + result[1] = static_cast(callResult[1]); return result; } else if constexpr (NumElements == 3) { using Vec3Type = ElemType __attribute__((ext_vector_type(3))); Vec3Type result; - result[0] = static_cast(vec4Result[0]); - result[1] = static_cast(vec4Result[1]); - result[2] = static_cast(vec4Result[2]); + result[0] = static_cast(callResult[0]); + result[1] = static_cast(callResult[1]); + result[2] = static_cast(callResult[2]); return result; } else { static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4"); using Vec4NarrowType = ElemType __attribute__((ext_vector_type(4))); Vec4NarrowType result; - result[0] = static_cast(vec4Result[0]); - result[1] = static_cast(vec4Result[1]); - result[2] = static_cast(vec4Result[2]); - result[3] = static_cast(vec4Result[3]); + result[0] = static_cast(callResult[0]); + result[1] = static_cast(callResult[1]); + result[2] = static_cast(callResult[2]); + result[3] = static_cast(callResult[3]); return result; } } -// Helper function to convert scalar or OpenCL vector value to vec4 for OpImageWrite. +// Helper function to convert scalar or OpenCL vector value to the appropriate +// call type for __spirv_Image* write. +// On SPIR-V/OpenCL this widens to vec4. +// on NVPTX/AMDGCN CallT == SourceType so this reduces to identity casts. template -static inline constexpr auto convertRequestedTypeToVec4(SourceType val) { - using RawElemType = get_image_element_type_t; - using ElemType = spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); +static inline constexpr auto convertOpenCLTypeToImageCallType(SourceType val) { + using CallT = spirv_image_call_type_t; + if constexpr (std::is_same_v) + return val; + + using ElemType = get_image_element_type_t; constexpr int NumElements = get_num_elements_v; - Vec4Type result{}; - if constexpr (NumElements == 1) { + CallT result{}; + if constexpr (get_num_elements_v == 1) { + result = static_cast(val); + } else if constexpr (NumElements == 1) { result[0] = static_cast(val); } else if constexpr (NumElements == 2) { result[0] = static_cast(val[0]); @@ -162,10 +213,10 @@ static void __invoke__ImageWrite(ImageT Img, CoordT Coords, ValT Val) { auto TmpVal = sycl::detail::convertToOpenCLType(Val); // SPIR-V spec requires OpImageWrite texel to be vec4. - auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal); + auto callVal = sycl::detail::convertOpenCLTypeToImageCallType(TmpVal); - __spirv_ImageWrite( - Img, TmpCoords, vec4Val); + __spirv_ImageWrite( + Img, TmpCoords, callVal); } template @@ -175,16 +226,14 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageRead(Img, TmpCoords); + // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageRead(Img, TmpCoords); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -194,17 +243,14 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageFetch(Img, - TmpCoords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageFetch(Img, TmpCoords); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -214,17 +260,15 @@ static RetType __invoke__SampledImageFetch(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_SampledImageFetch( - Img, TmpCoords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_SampledImageFetch(Img, + TmpCoords); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -251,17 +295,15 @@ static RetType __invoke__ImageArrayFetch(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageArrayFetch( + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageArrayFetch( Img, TmpCoords, ArrayLayer); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -272,17 +314,15 @@ static RetType __invoke__SampledImageArrayFetch(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_SampledImageArrayFetch( + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_SampledImageArrayFetch( Img, TmpCoords, ArrayLayer); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -293,17 +333,14 @@ static RetType __invoke__ImageArrayRead(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); - // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageArrayRead( - Img, TmpCoords, ArrayLayer); + // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = __spirv_ImageArrayRead( + Img, TmpCoords, ArrayLayer); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -315,10 +352,10 @@ static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer, auto TmpVal = sycl::detail::convertToOpenCLType(Val); // SPIR-V spec requires OpImageWrite texel to be vec4. - auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal); + auto callVal = sycl::detail::convertOpenCLTypeToImageCallType(TmpVal); - __spirv_ImageArrayWrite( - Img, TmpCoords, ArrayLayer, vec4Val); + __spirv_ImageArrayWrite( + Img, TmpCoords, ArrayLayer, callVal); } template @@ -328,17 +365,15 @@ static RetType __invoke__ImageReadCubemap(SmpImageT SmpImg, DirVecT DirVec) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpDirVec = sycl::detail::convertToOpenCLType(DirVec); - // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageSampleCubemap( + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleCubemap( SmpImg, TmpDirVec); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -356,17 +391,15 @@ static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present - // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageSampleExplicitLod( + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleExplicitLod( SmpImg, TmpCoords, ImageOperands::Lod, Level); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -387,17 +420,15 @@ static RetType __invoke__ImageReadGrad(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present - // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageSampleExplicitLod( + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleExplicitLod( SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -422,18 +453,16 @@ static RetType __invoke__ImageReadSampler(ImageT Img, CoordT Coords, enum ImageOperands { Lod = 0x2 }; // Lod value is zero as mipmap is not supported. - // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4. - using RawElemType = sycl::detail::get_image_element_type_t; - using ElemType = sycl::detail::spirv_image_widened_elem_type_t; - using Vec4Type = ElemType __attribute__((ext_vector_type(4))); - - Vec4Type vec4Result = - __spirv_ImageSampleExplicitLod( + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleExplicitLod( __spirv_SampledImage(Img, Smpl), TmpCoords, ImageOperands::Lod, 0.0f); return sycl::detail::convertFromOpenCLTypeFor( - sycl::detail::convertVec4ToRequestedType(vec4Result)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } namespace sycl { diff --git a/sycl/test/check_device_code/image_spirv_types.cpp b/sycl/test/check_device_code/image_spirv_types.cpp index d1922b7e2c1fb..d9b854728b8cc 100644 --- a/sycl/test/check_device_code/image_spirv_types.cpp +++ b/sycl/test/check_device_code/image_spirv_types.cpp @@ -108,7 +108,8 @@ SYCL_EXTERNAL void test_uint16_write(OCLImageTyWrite img, uint16_t val) { // CHECK: call spir_func void @{{.*}}__spirv_ImageWrite // CHECK-SAME: <4 x i32> // CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} <2 x i8> -SYCL_EXTERNAL void test_uint8_vec2_write(OCLImageTyWrite img, sycl::vec val) { +SYCL_EXTERNAL void test_uint8_vec2_write(OCLImageTyWrite img, + sycl::vec val) { __invoke__ImageWrite(img, sycl::int2(0, 0), val); } @@ -215,7 +216,8 @@ SYCL_EXTERNAL uint16_t test_uint16_sampled_array_fetch(OCLSampledImageTy img) { // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap // CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleCubemap SYCL_EXTERNAL int8_t test_int8_read_cubemap(OCLSampledImageTy img) { - return __invoke__ImageReadCubemap(img, sycl::float3(0.0f, 0.0f, 0.0f)); + return __invoke__ImageReadCubemap(img, + sycl::float3(0.0f, 0.0f, 0.0f)); } // Test uint16_t cubemap read - should widen to <4 x i32> @@ -223,7 +225,8 @@ SYCL_EXTERNAL int8_t test_int8_read_cubemap(OCLSampledImageTy img) { // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap // CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleCubemap SYCL_EXTERNAL uint16_t test_uint16_read_cubemap(OCLSampledImageTy img) { - return __invoke__ImageReadCubemap(img, sycl::float3(0.0f, 0.0f, 0.0f)); + return __invoke__ImageReadCubemap(img, + sycl::float3(0.0f, 0.0f, 0.0f)); } // Test int8_t lod read - should widen to <4 x i32> @@ -258,8 +261,8 @@ SYCL_EXTERNAL int8_t test_int8_read_grad(OCLSampledImageTy img) { // CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod SYCL_EXTERNAL uint16_t test_uint16_read_grad(OCLSampledImageTy img) { return __invoke__ImageReadGrad(img, sycl::float2(0.0f, 0.0f), - sycl::float2(0.0f, 0.0f), - sycl::float2(0.0f, 0.0f)); + sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f)); } // Test int8_t sampler read - should widen to <4 x i32> @@ -267,8 +270,9 @@ SYCL_EXTERNAL uint16_t test_uint16_read_grad(OCLSampledImageTy img) { // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod // CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod SYCL_EXTERNAL int8_t test_int8_read_sampler(OCLImageTyRead img, - const __ocl_sampler_t &smpl) { - return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), smpl); + const __ocl_sampler_t &smpl) { + return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), + smpl); } // Test uint16_t sampler read - should widen to <4 x i32> @@ -276,6 +280,7 @@ SYCL_EXTERNAL int8_t test_int8_read_sampler(OCLImageTyRead img, // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod // CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod SYCL_EXTERNAL uint16_t test_uint16_read_sampler(OCLImageTyRead img, - const __ocl_sampler_t &smpl) { - return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), smpl); + const __ocl_sampler_t &smpl) { + return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), + smpl); }