diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index 1880c7e520c54..7d435154ed3bf 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -30,7 +30,6 @@ #include #include -#include #include @@ -41,24 +40,153 @@ namespace detail { // Type trait to get the associated sampled image type for a given image type. template struct sampled_opencl_image_type; -// The SPIR-V spec requires the result of OpImageSampleExplicitLod to be a -// vector type of four components. To satisfy this requirement, we need to use -// a temporary vector type to hold the result of the SPIR-V call, and then -// copy the result back to the original return type. The following type trait is -// used to get the temporary vector type based on the original return type. - -template struct image_sample_explicit_lod_result { - using type = sycl::vec; +// Helper to extract element type from scalar or OpenCL vector type +// (e.g., float or float __attribute__((ext_vector_type(4)))) +template struct get_image_element_type { + using type = T; +}; +template +struct get_image_element_type { + using type = T; }; +template +using get_image_element_type_t = typename get_image_element_type::type; -template -struct image_sample_explicit_lod_result> { - using type = sycl::vec; +// Helper to get number of elements (1 for scalar, N for vector) +template struct get_num_elements { + static constexpr int value = 1; +}; +template +struct get_num_elements { + static constexpr int value = N; +}; +template +inline constexpr int get_num_elements_v = get_num_elements::value; + +// The OpenCL SPIR-V environment spec requires that OpImageRead, OpImageWrite, +// OpImageFetch, and OpImageSampleExplicitLod use vec4 operands with 32-bit +// component types, with the sole exception of half (_Float16) which may use +// 16-bit components. Channel sizes and narrow integer types (8-bit and 16-bit) +// must be widened to their vec4 32-bit equivalents. +// +// On NVPTX and AMDGCN, the __spirv_Image* builtins are implemented directly in +// libclc with byte strides taken into account. Widening to 32-bit or vec4 +// selects the wrong libclc overload. On those targets spirv_image_call_type_t +// leaves the type unchanged. +#if !defined(__NVPTX__) && !defined(__AMDGCN__) +template struct spirv_image_widened_elem_type { + using type = T; +}; +template <> struct spirv_image_widened_elem_type { + using type = int32_t; +}; +template <> struct spirv_image_widened_elem_type { + using type = uint32_t; +}; +template <> struct spirv_image_widened_elem_type { + using type = int32_t; }; +template <> struct spirv_image_widened_elem_type { + using type = uint32_t; +}; +template +using spirv_image_widened_elem_type_t = + typename spirv_image_widened_elem_type::type; +#endif + +// spirv_image_call_type_t: the type to use for __spirv_Image* read/write +// calls. On SPIR-V/OpenCL: widened elem type forced to vec4. On NVPTX/AMDGCN: T +// unchanged (libclc has native-typed overloads). +#if defined(__NVPTX__) || defined(__AMDGCN__) +template struct spirv_image_call_type { + using type = T; +}; +#else +template struct spirv_image_call_type { + using ElemT = spirv_image_widened_elem_type_t>; + using type = ElemT __attribute__((ext_vector_type(4))); +}; +#endif +template +using spirv_image_call_type_t = typename spirv_image_call_type::type; + +// Helper function to convert the result of a __spirv_Image* call back to the +// requested OpenCL type. +// On SPIR-V/OpenCL the call returns a widened vec4. +// On NVPTX/AMDGCN CallT == RequestedType so this reduces to identity casts. +template +static inline constexpr RequestedType +convertImageCallResultToOpenCLType(CallT callResult) { + if constexpr (std::is_same_v) + return callResult; + + using ElemType = get_image_element_type_t; + constexpr int NumElements = get_num_elements_v; + + if constexpr (NumElements == 1) { + if constexpr (get_num_elements_v == 1) + return static_cast(callResult); + else + return static_cast(callResult[0]); + } else if constexpr (NumElements == 2) { + using Vec2Type = ElemType __attribute__((ext_vector_type(2))); + Vec2Type result; + result[0] = static_cast(callResult[0]); + result[1] = static_cast(callResult[1]); + return result; + } else if constexpr (NumElements == 3) { + using Vec3Type = ElemType __attribute__((ext_vector_type(3))); + Vec3Type result; + result[0] = static_cast(callResult[0]); + result[1] = static_cast(callResult[1]); + result[2] = static_cast(callResult[2]); + return result; + } else { + static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4"); + using Vec4NarrowType = ElemType __attribute__((ext_vector_type(4))); + Vec4NarrowType result; + result[0] = static_cast(callResult[0]); + result[1] = static_cast(callResult[1]); + result[2] = static_cast(callResult[2]); + result[3] = static_cast(callResult[3]); + return result; + } +} -template -using image_sample_explicit_lod_result_t = - typename image_sample_explicit_lod_result::type; +// Helper function to convert scalar or OpenCL vector value to the appropriate +// call type for __spirv_Image* write. +// On SPIR-V/OpenCL this widens to vec4. +// on NVPTX/AMDGCN CallT == SourceType so this reduces to identity casts. +template +static inline constexpr auto convertOpenCLTypeToImageCallType(SourceType val) { + using CallT = spirv_image_call_type_t; + if constexpr (std::is_same_v) + return val; + + using ElemType = get_image_element_type_t; + constexpr int NumElements = get_num_elements_v; + + CallT result{}; + if constexpr (get_num_elements_v == 1) { + result = static_cast(val); + } else if constexpr (NumElements == 1) { + result[0] = static_cast(val); + } else if constexpr (NumElements == 2) { + result[0] = static_cast(val[0]); + result[1] = static_cast(val[1]); + } else if constexpr (NumElements == 3) { + result[0] = static_cast(val[0]); + result[1] = static_cast(val[1]); + result[2] = static_cast(val[2]); + } else { + static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4"); + result[0] = static_cast(val[0]); + result[1] = static_cast(val[1]); + result[2] = static_cast(val[2]); + result[3] = static_cast(val[3]); + } + return result; +} } // namespace detail } // namespace _V1 @@ -84,8 +212,11 @@ static void __invoke__ImageWrite(ImageT Img, CoordT Coords, ValT Val) { auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); auto TmpVal = sycl::detail::convertToOpenCLType(Val); - __spirv_ImageWrite( - Img, TmpCoords, TmpVal); + // SPIR-V spec requires OpImageWrite texel to be vec4. + auto callVal = sycl::detail::convertOpenCLTypeToImageCallType(TmpVal); + + __spirv_ImageWrite( + Img, TmpCoords, callVal); } template @@ -95,8 +226,14 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageRead(Img, TmpCoords); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageRead(Img, TmpCoords)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -106,9 +243,14 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageFetch(Img, TmpCoords); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageFetch(Img, - TmpCoords)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -118,9 +260,15 @@ static RetType __invoke__SampledImageFetch(ImageT Img, CoordT Coords) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_SampledImageFetch(Img, + TmpCoords); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_SampledImageFetch( - Img, TmpCoords)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -147,9 +295,15 @@ static RetType __invoke__ImageArrayFetch(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageArrayFetch( + Img, TmpCoords, ArrayLayer); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageArrayFetch( - Img, TmpCoords, ArrayLayer)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -160,9 +314,15 @@ static RetType __invoke__SampledImageArrayFetch(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_SampledImageArrayFetch( + Img, TmpCoords, ArrayLayer); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_SampledImageArrayFetch( - Img, TmpCoords, ArrayLayer)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -173,9 +333,14 @@ static RetType __invoke__ImageArrayRead(ImageT Img, CoordT Coords, using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); + // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) + // vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = __spirv_ImageArrayRead( + Img, TmpCoords, ArrayLayer); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageArrayRead( - Img, TmpCoords, ArrayLayer)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -186,8 +351,11 @@ static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer, auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); auto TmpVal = sycl::detail::convertToOpenCLType(Val); - __spirv_ImageArrayWrite( - Img, TmpCoords, ArrayLayer, TmpVal); + // SPIR-V spec requires OpImageWrite texel to be vec4. + auto callVal = sycl::detail::convertOpenCLTypeToImageCallType(TmpVal); + + __spirv_ImageArrayWrite( + Img, TmpCoords, ArrayLayer, callVal); } template @@ -197,25 +365,22 @@ static RetType __invoke__ImageReadCubemap(SmpImageT SmpImg, DirVecT DirVec) { using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpDirVec = sycl::detail::convertToOpenCLType(DirVec); + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleCubemap( + SmpImg, TmpDirVec); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleCubemap( - SmpImg, TmpDirVec)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, float Level) { - // The result type of the SPIR-V instruction OpImageSampleExplicitLod must be - // a vector of four components. Use the type trait to get the appropriate - // temporary vector type based on the original return type. - using NoRefT = std::remove_reference_t; - using RetVecType = sycl::detail::image_sample_explicit_lod_result_t; - static_assert(sizeof(RetVecType) >= sizeof(RetType), - "RetVecType should be at least as big as RetType to hold the " - "result of the SPIR-V call."); - // Convert from sycl types to builtin types to get correct function mangling. - using TempRetT = sycl::detail::ConvertToOpenCLType_t; + using TempRetT = sycl::detail::ConvertToOpenCLType_t; auto TmpCoords = sycl::detail::convertToOpenCLType(Coords); enum ImageOperands { Lod = 0x2 }; @@ -226,14 +391,15 @@ static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present - auto ResultVec = sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleExplicitLod( - SmpImg, TmpCoords, ImageOperands::Lod, Level)); - - // Copy the result back to the original return type the user expects. - RetType Result; - sycl::detail::memcpy_no_adl(&Result, &ResultVec, sizeof(Result)); - return Result; + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleExplicitLod( + SmpImg, TmpCoords, ImageOperands::Lod, Level); + + return sycl::detail::convertFromOpenCLTypeFor( + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -254,9 +420,15 @@ static RetType __invoke__ImageReadGrad(SmpImageT SmpImg, CoordT Coords, // Sampled Image must be an object whose type is OpTypeSampledImage // Image Operands encodes what operands follow. Either Lod // or Grad image operands must be present + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleExplicitLod( + SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY); + return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleExplicitLod( - SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY)); + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } template @@ -281,10 +453,16 @@ static RetType __invoke__ImageReadSampler(ImageT Img, CoordT Coords, enum ImageOperands { Lod = 0x2 }; // Lod value is zero as mipmap is not supported. - return sycl::detail::convertFromOpenCLTypeFor( - __spirv_ImageSampleExplicitLod( + // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit + // for half) vec4. + using CallT = sycl::detail::spirv_image_call_type_t; + CallT callResult = + __spirv_ImageSampleExplicitLod( __spirv_SampledImage(Img, Smpl), TmpCoords, - ImageOperands::Lod, 0.0f)); + ImageOperands::Lod, 0.0f); + + return sycl::detail::convertFromOpenCLTypeFor( + sycl::detail::convertImageCallResultToOpenCLType(callResult)); } namespace sycl { diff --git a/sycl/test/check_device_code/image_spirv_types.cpp b/sycl/test/check_device_code/image_spirv_types.cpp new file mode 100644 index 0000000000000..d9b854728b8cc --- /dev/null +++ b/sycl/test/check_device_code/image_spirv_types.cpp @@ -0,0 +1,286 @@ +// Check that image operations use supported types by OpenCL SPIR-V env spec. +// +// The OpenCL SPIR-V environment spec requires that OpImageRead, OpImageWrite, +// OpImageFetch, and OpImageSampleExplicitLod use vec4 operands with 32-bit +// component types (or 16-bit for half). This test verifies that channel sizes +// and narrow integer types (int8_t, uint8_t, int16_t, uint16_t) are properly +// widened to vec4 32-bit in the generated SPIR-V calls. + +// RUN: %clangxx -O2 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s + +#include + +using OCLImageTyRead = + typename sycl::detail::opencl_image_type<2, sycl::access::mode::read, + sycl::access::target::image>::type; + +using OCLImageTyWrite = + typename sycl::detail::opencl_image_type<2, sycl::access::mode::write, + sycl::access::target::image>::type; + +using OCLSampledImageTy = + typename sycl::detail::sampled_opencl_image_type::type; + +// Test int8_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL int8_t test_int8_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test uint8_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint8_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL uint8_t test_uint8_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test int16_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int16_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL int16_t test_int16_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test uint16_t read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL uint16_t test_uint16_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test sycl::vec read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_vec4_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} <4 x i8> @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL sycl::vec test_int8_vec4_read(OCLImageTyRead img) { + return __invoke__ImageRead>(img, sycl::int2(0, 0)); +} + +// Test float - should NOT widen (already 32-bit) +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_float_read +// CHECK: call {{.*}} <4 x float> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} float @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL float test_float_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test int32_t - should NOT widen (already 32-bit) +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int32_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} i32 @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL int32_t test_int32_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test sycl::half - should NOT widen (16-bit is allowed for half) +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_half_read +// CHECK: call {{.*}} <4 x half> @{{.*}}__spirv_ImageRead +// CHECK-NOT: call {{.*}} half @{{.*}}__spirv_ImageRead +SYCL_EXTERNAL sycl::half test_half_read(OCLImageTyRead img) { + return __invoke__ImageRead(img, sycl::int2(0, 0)); +} + +// Test int8_t write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} i8 +SYCL_EXTERNAL void test_int8_write(OCLImageTyWrite img, int8_t val) { + __invoke__ImageWrite(img, sycl::int2(0, 0), val); +} + +// Test uint16_t write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} i16 +SYCL_EXTERNAL void test_uint16_write(OCLImageTyWrite img, uint16_t val) { + __invoke__ImageWrite(img, sycl::int2(0, 0), val); +} + +// Test sycl::vec write - should widen to <4 x i32> with zero-fill +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint8_vec2_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} <2 x i8> +SYCL_EXTERNAL void test_uint8_vec2_write(OCLImageTyWrite img, + sycl::vec val) { + __invoke__ImageWrite(img, sycl::int2(0, 0), val); +} + +// Test int8_t fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageFetch +SYCL_EXTERNAL int8_t test_int8_fetch(OCLImageTyRead img) { + return __invoke__ImageFetch(img, sycl::int2(0, 0)); +} + +// Test uint16_t fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageFetch +SYCL_EXTERNAL uint16_t test_uint16_fetch(OCLImageTyRead img) { + return __invoke__ImageFetch(img, sycl::int2(0, 0)); +} + +// Test int8_t sampled fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_sampled_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_SampledImageFetch +SYCL_EXTERNAL int8_t test_int8_sampled_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageFetch(img, sycl::int2(0, 0)); +} + +// Test uint16_t sampled fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_sampled_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_SampledImageFetch +SYCL_EXTERNAL uint16_t test_uint16_sampled_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageFetch(img, sycl::int2(0, 0)); +} + +// Test int8_t array read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayRead +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageArrayRead +SYCL_EXTERNAL int8_t test_int8_array_read(OCLImageTyRead img) { + return __invoke__ImageArrayRead(img, sycl::int2(0, 0), 0); +} + +// Test uint16_t array read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_read +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayRead +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageArrayRead +SYCL_EXTERNAL uint16_t test_uint16_array_read(OCLImageTyRead img) { + return __invoke__ImageArrayRead(img, sycl::int2(0, 0), 0); +} + +// Test int8_t array write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageArrayWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageArrayWrite{{.*}} i8 +SYCL_EXTERNAL void test_int8_array_write(OCLImageTyWrite img, int8_t val) { + __invoke__ImageArrayWrite(img, sycl::int2(0, 0), 0, val); +} + +// Test uint16_t array write - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_write +// CHECK: call spir_func void @{{.*}}__spirv_ImageArrayWrite +// CHECK-SAME: <4 x i32> +// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageArrayWrite{{.*}} i16 +SYCL_EXTERNAL void test_uint16_array_write(OCLImageTyWrite img, uint16_t val) { + __invoke__ImageArrayWrite(img, sycl::int2(0, 0), 0, val); +} + +// Test int8_t array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageArrayFetch +SYCL_EXTERNAL int8_t test_int8_array_fetch(OCLImageTyRead img) { + return __invoke__ImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test uint16_t array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageArrayFetch +SYCL_EXTERNAL uint16_t test_uint16_array_fetch(OCLImageTyRead img) { + return __invoke__ImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test int8_t sampled array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_sampled_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageArrayFetch +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_SampledImageArrayFetch +SYCL_EXTERNAL int8_t test_int8_sampled_array_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test uint16_t sampled array fetch - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_sampled_array_fetch +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageArrayFetch +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_SampledImageArrayFetch +SYCL_EXTERNAL uint16_t test_uint16_sampled_array_fetch(OCLSampledImageTy img) { + return __invoke__SampledImageArrayFetch(img, sycl::int2(0, 0), 0); +} + +// Test int8_t cubemap read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_cubemap +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleCubemap +SYCL_EXTERNAL int8_t test_int8_read_cubemap(OCLSampledImageTy img) { + return __invoke__ImageReadCubemap(img, + sycl::float3(0.0f, 0.0f, 0.0f)); +} + +// Test uint16_t cubemap read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_cubemap +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleCubemap +SYCL_EXTERNAL uint16_t test_uint16_read_cubemap(OCLSampledImageTy img) { + return __invoke__ImageReadCubemap(img, + sycl::float3(0.0f, 0.0f, 0.0f)); +} + +// Test int8_t lod read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_lod +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL int8_t test_int8_read_lod(OCLSampledImageTy img) { + return __invoke__ImageReadLod(img, sycl::float2(0.0f, 0.0f), 0.0f); +} + +// Test uint16_t lod read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_lod +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL uint16_t test_uint16_read_lod(OCLSampledImageTy img) { + return __invoke__ImageReadLod(img, sycl::float2(0.0f, 0.0f), 0.0f); +} + +// Test int8_t grad read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_grad +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL int8_t test_int8_read_grad(OCLSampledImageTy img) { + return __invoke__ImageReadGrad(img, sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f)); +} + +// Test uint16_t grad read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_grad +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL uint16_t test_uint16_read_grad(OCLSampledImageTy img) { + return __invoke__ImageReadGrad(img, sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f), + sycl::float2(0.0f, 0.0f)); +} + +// Test int8_t sampler read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_sampler +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL int8_t test_int8_read_sampler(OCLImageTyRead img, + const __ocl_sampler_t &smpl) { + return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), + smpl); +} + +// Test uint16_t sampler read - should widen to <4 x i32> +// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_sampler +// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod +// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod +SYCL_EXTERNAL uint16_t test_uint16_read_sampler(OCLImageTyRead img, + const __ocl_sampler_t &smpl) { + return __invoke__ImageReadSampler(img, sycl::float2(0.0f, 0.0f), + smpl); +}