From 3ae5a6bf4a69b5b38623906b14451062f19c312b Mon Sep 17 00:00:00 2001
From: Sean Stirling <sstirlin@qti.qualcomm.com>
Date: Tue, 28 Apr 2026 14:39:21 +0100
Subject: [PATCH 1/3] [SYCL] Fix SPIR-V type requirements for image operations

The SPIR-V OpenCL environment spec requires OpImageRead, OpImageWrite,
OpImageFetch, and OpImageSampleExplicitLod to use vec4 operands with
32-bit component types (or 16-bit for half). Channel sizes and narrow
integer types (8-bit and 16-bit) must be widened to their vec4 32-bit
equivalents for the SPIR-V instruction.

All __invoke__Image* functions now use these helpers to ensure correct
SPIR-V type generation.
---
 sycl/include/sycl/detail/image_ocl_types.hpp  | 269 +++++++++++++----
 .../check_device_code/image_spirv_types.cpp   | 281 ++++++++++++++++++
 2 files changed, 495 insertions(+), 55 deletions(-)
 create mode 100644 sycl/test/check_device_code/image_spirv_types.cpp
diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp
index 1880c7e520c54..dcdccb943943a 100644
--- a/sycl/include/sycl/detail/image_ocl_types.hpp
+++ b/sycl/include/sycl/detail/image_ocl_types.hpp
@@ -30,7 +30,6 @@
 
 #include <sycl/access/access.hpp>
 #include <sycl/detail/generic_type_traits.hpp>
-#include <sycl/detail/memcpy.hpp>
 
 #include <sycl/__spirv/spirv_ops_image.hpp>
 
@@ -41,24 +40,112 @@ namespace detail {
 // Type trait to get the associated sampled image type for a given image type.
 template <typename ImageType> struct sampled_opencl_image_type;
 
-// The SPIR-V spec requires the result of OpImageSampleExplicitLod to be a
-// vector type of four components. To satisfy this requirement, we need to use
-// a temporary vector type to hold the result of the SPIR-V call, and then
-// copy the result back to the original return type. The following type trait is
-// used to get the temporary vector type based on the original return type.
-
-template <typename RetType> struct image_sample_explicit_lod_result {
-  using type = sycl::vec<RetType, 4>;
+// Helper to extract element type from scalar or OpenCL vector type
+// (e.g., float or float __attribute__((ext_vector_type(4))))
+template <typename T> struct get_image_element_type { using type = T; };
+template <typename T, int N>
+struct get_image_element_type<T __attribute__((ext_vector_type(N)))> {
+  using type = T;
 };
+template <typename T>
+using get_image_element_type_t = typename get_image_element_type<T>::type;
 
-template <typename ElemT, int N>
-struct image_sample_explicit_lod_result<sycl::vec<ElemT, N>> {
-  using type = sycl::vec<ElemT, 4>;
+// Helper to get number of elements (1 for scalar, N for vector)
+template <typename T> struct get_num_elements {
+  static constexpr int value = 1;
+};
+template <typename T, int N>
+struct get_num_elements<T __attribute__((ext_vector_type(N)))> {
+  static constexpr int value = N;
 };
+template <typename T>
+inline constexpr int get_num_elements_v = get_num_elements<T>::value;
+
+// The OpenCL SPIR-V environment spec requires that OpImageRead, OpImageWrite,
+// OpImageFetch, and OpImageSampleExplicitLod use vec4 operands with 32-bit
+// component types, with the sole exception of half (_Float16) which may use
+// 16-bit components. Channel sizes and narrow integer types (8-bit and 16-bit)
+// must be widened to their vec4 32-bit equivalents.
+template <typename T> struct spirv_image_widened_elem_type { using type = T; };
+template <> struct spirv_image_widened_elem_type<int8_t>   { using type = int32_t; };
+template <> struct spirv_image_widened_elem_type<uint8_t>  { using type = uint32_t; };
+template <> struct spirv_image_widened_elem_type<int16_t>  { using type = int32_t; };
+template <> struct spirv_image_widened_elem_type<uint16_t> { using type = uint32_t; };
+template <typename T>
+using spirv_image_widened_elem_type_t = typename spirv_image_widened_elem_type<T>::type;
+
+// Compile-time verification of type widening rules.
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<int8_t>, int32_t>);
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<uint8_t>, uint32_t>);
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<int16_t>, int32_t>);
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<uint16_t>, uint32_t>);
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<int32_t>, int32_t>);
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<uint32_t>, uint32_t>);
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<float>, float>);
+static_assert(std::is_same_v<spirv_image_widened_elem_type_t<_Float16>, _Float16>);
+
+// Helper function to convert vec4 result to requested OpenCL type.
+// Handles scalar, vec2, vec3, and vec4 return types.
+template <typename RequestedType, typename Vec4Type>
+static inline constexpr RequestedType convertVec4ToRequestedType(Vec4Type vec4Result) {
+  using ElemType = get_image_element_type_t<RequestedType>;
+  constexpr int NumElements = get_num_elements_v<RequestedType>;
+
+  // Extract components based on RequestedType
+  if constexpr (NumElements == 1) {
+    return static_cast<ElemType>(vec4Result[0]);
+  } else if constexpr (NumElements == 2) {
+    using Vec2Type = ElemType __attribute__((ext_vector_type(2)));
+    Vec2Type result;
+    result[0] = static_cast<ElemType>(vec4Result[0]);
+    result[1] = static_cast<ElemType>(vec4Result[1]);
+    return result;
+  } else if constexpr (NumElements == 3) {
+    using Vec3Type = ElemType __attribute__((ext_vector_type(3)));
+    Vec3Type result;
+    result[0] = static_cast<ElemType>(vec4Result[0]);
+    result[1] = static_cast<ElemType>(vec4Result[1]);
+    result[2] = static_cast<ElemType>(vec4Result[2]);
+    return result;
+  } else {
+    static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4");
+    using Vec4NarrowType = ElemType __attribute__((ext_vector_type(4)));
+    Vec4NarrowType result;
+    result[0] = static_cast<ElemType>(vec4Result[0]);
+    result[1] = static_cast<ElemType>(vec4Result[1]);
+    result[2] = static_cast<ElemType>(vec4Result[2]);
+    result[3] = static_cast<ElemType>(vec4Result[3]);
+    return result;
+  }
+}
 
-template <typename RetType>
-using image_sample_explicit_lod_result_t =
-    typename image_sample_explicit_lod_result<RetType>::type;
+// Helper function to convert scalar or OpenCL vector value to vec4 for OpImageWrite.
+template <typename SourceType>
+static inline constexpr auto convertRequestedTypeToVec4(SourceType val) {
+  using RawElemType = get_image_element_type_t<SourceType>;
+  using ElemType = spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+  constexpr int NumElements = get_num_elements_v<SourceType>;
+
+  Vec4Type result{};
+  if constexpr (NumElements == 1) {
+    result[0] = static_cast<ElemType>(val);
+  } else if constexpr (NumElements == 2) {
+    result[0] = static_cast<ElemType>(val[0]);
+    result[1] = static_cast<ElemType>(val[1]);
+  } else if constexpr (NumElements == 3) {
+    result[0] = static_cast<ElemType>(val[0]);
+    result[1] = static_cast<ElemType>(val[1]);
+    result[2] = static_cast<ElemType>(val[2]);
+  } else {
+    static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4");
+    result[0] = static_cast<ElemType>(val[0]);
+    result[1] = static_cast<ElemType>(val[1]);
+    result[2] = static_cast<ElemType>(val[2]);
+    result[3] = static_cast<ElemType>(val[3]);
+  }
+  return result;
+}
 
 } // namespace detail
 } // namespace _V1
@@ -84,8 +171,11 @@ static void __invoke__ImageWrite(ImageT Img, CoordT Coords, ValT Val) {
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
   auto TmpVal = sycl::detail::convertToOpenCLType(Val);
 
-  __spirv_ImageWrite<ImageT, decltype(TmpCoords), decltype(TmpVal)>(
-      Img, TmpCoords, TmpVal);
+  // SPIR-V spec requires OpImageWrite texel to be vec4.
+  auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal);
+
+  __spirv_ImageWrite<ImageT, decltype(TmpCoords), decltype(vec4Val)>(
+      Img, TmpCoords, vec4Val);
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -95,8 +185,16 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
+  // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageRead<Vec4Type, ImageT, decltype(TmpCoords)>(Img, TmpCoords);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageRead<TempRetT, ImageT, decltype(TmpCoords)>(Img, TmpCoords));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -106,9 +204,17 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageFetch<Vec4Type, ImageT, decltype(TmpCoords)>(Img,
+                                                                TmpCoords);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageFetch<TempRetT, ImageT, decltype(TmpCoords)>(Img,
-                                                                TmpCoords));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -118,9 +224,17 @@ static RetType __invoke__SampledImageFetch(ImageT Img, CoordT Coords) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_SampledImageFetch<Vec4Type, ImageT, decltype(TmpCoords)>(
+          Img, TmpCoords);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_SampledImageFetch<TempRetT, ImageT, decltype(TmpCoords)>(
-          Img, TmpCoords));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -147,9 +261,17 @@ static RetType __invoke__ImageArrayFetch(ImageT Img, CoordT Coords,
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageArrayFetch<Vec4Type, ImageT, decltype(TmpCoords)>(
+          Img, TmpCoords, ArrayLayer);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageArrayFetch<TempRetT, ImageT, decltype(TmpCoords)>(
-          Img, TmpCoords, ArrayLayer));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -160,9 +282,17 @@ static RetType __invoke__SampledImageArrayFetch(ImageT Img, CoordT Coords,
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_SampledImageArrayFetch<Vec4Type, ImageT, decltype(TmpCoords)>(
+          Img, TmpCoords, ArrayLayer);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_SampledImageArrayFetch<TempRetT, ImageT, decltype(TmpCoords)>(
-          Img, TmpCoords, ArrayLayer));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -173,9 +303,17 @@ static RetType __invoke__ImageArrayRead(ImageT Img, CoordT Coords,
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
+  // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageArrayRead<Vec4Type, ImageT, decltype(TmpCoords)>(
+          Img, TmpCoords, ArrayLayer);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageArrayRead<TempRetT, ImageT, decltype(TmpCoords)>(
-          Img, TmpCoords, ArrayLayer));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename ImageT, typename CoordT, typename ValT>
@@ -186,8 +324,11 @@ static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer,
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
   auto TmpVal = sycl::detail::convertToOpenCLType(Val);
 
-  __spirv_ImageArrayWrite<ImageT, decltype(TmpCoords), decltype(TmpVal)>(
-      Img, TmpCoords, ArrayLayer, TmpVal);
+  // SPIR-V spec requires OpImageWrite texel to be vec4.
+  auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal);
+
+  __spirv_ImageArrayWrite<ImageT, decltype(TmpCoords), decltype(vec4Val)>(
+      Img, TmpCoords, ArrayLayer, vec4Val);
 }
 
 template <typename RetType, typename SmpImageT, typename DirVecT>
@@ -197,25 +338,24 @@ static RetType __invoke__ImageReadCubemap(SmpImageT SmpImg, DirVecT DirVec) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpDirVec = sycl::detail::convertToOpenCLType(DirVec);
 
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageSampleCubemap<SmpImageT, Vec4Type, decltype(TmpDirVec)>(
+          SmpImg, TmpDirVec);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageSampleCubemap<SmpImageT, TempRetT, decltype(TmpDirVec)>(
-          SmpImg, TmpDirVec));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename SmpImageT, typename CoordT>
 static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords,
                                       float Level) {
-  // The result type of the SPIR-V instruction OpImageSampleExplicitLod must be
-  // a vector of four components. Use the type trait to get the appropriate
-  // temporary vector type based on the original return type.
-  using NoRefT = std::remove_reference_t<RetType>;
-  using RetVecType = sycl::detail::image_sample_explicit_lod_result_t<NoRefT>;
-  static_assert(sizeof(RetVecType) >= sizeof(RetType),
-                "RetVecType should be at least as big as RetType to hold the "
-                "result of the SPIR-V call.");
-
   // Convert from sycl types to builtin types to get correct function mangling.
-  using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetVecType>;
+  using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
   enum ImageOperands { Lod = 0x2 };
@@ -226,14 +366,17 @@ static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords,
   // Sampled Image must be an object whose type is OpTypeSampledImage
   // Image Operands encodes what operands follow. Either Lod
   // or Grad image operands must be present
-  auto ResultVec = sycl::detail::convertFromOpenCLTypeFor<RetVecType>(
-      __spirv_ImageSampleExplicitLod<SmpImageT, TempRetT, decltype(TmpCoords)>(
-          SmpImg, TmpCoords, ImageOperands::Lod, Level));
-
-  // Copy the result back to the original return type the user expects.
-  RetType Result;
-  sycl::detail::memcpy_no_adl(&Result, &ResultVec, sizeof(Result));
-  return Result;
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageSampleExplicitLod<SmpImageT, Vec4Type, decltype(TmpCoords)>(
+          SmpImg, TmpCoords, ImageOperands::Lod, Level);
+
+  return sycl::detail::convertFromOpenCLTypeFor<RetType>(
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename SmpImageT, typename CoordT>
@@ -254,9 +397,17 @@ static RetType __invoke__ImageReadGrad(SmpImageT SmpImg, CoordT Coords,
   // Sampled Image must be an object whose type is OpTypeSampledImage
   // Image Operands encodes what operands follow. Either Lod
   // or Grad image operands must be present
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageSampleExplicitLod<SmpImageT, Vec4Type, decltype(TmpCoords)>(
+          SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY);
+
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageSampleExplicitLod<SmpImageT, TempRetT, decltype(TmpCoords)>(
-          SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY));
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -281,10 +432,18 @@ static RetType __invoke__ImageReadSampler(ImageT Img, CoordT Coords,
   enum ImageOperands { Lod = 0x2 };
 
   // Lod value is zero as mipmap is not supported.
-  return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      __spirv_ImageSampleExplicitLod<SampledT, TempRetT, decltype(TmpCoords)>(
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
+  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
+  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
+  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+
+  Vec4Type vec4Result =
+      __spirv_ImageSampleExplicitLod<SampledT, Vec4Type, decltype(TmpCoords)>(
           __spirv_SampledImage<ImageT, SampledT>(Img, Smpl), TmpCoords,
-          ImageOperands::Lod, 0.0f));
+          ImageOperands::Lod, 0.0f);
+
+  return sycl::detail::convertFromOpenCLTypeFor<RetType>(
+      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
 }
 
 namespace sycl {
diff --git a/sycl/test/check_device_code/image_spirv_types.cpp b/sycl/test/check_device_code/image_spirv_types.cpp
new file mode 100644
index 0000000000000..d1922b7e2c1fb
--- /dev/null
+++ b/sycl/test/check_device_code/image_spirv_types.cpp
@@ -0,0 +1,281 @@
+// Check that image operations use supported types by OpenCL SPIR-V env spec.
+//
+// The OpenCL SPIR-V environment spec requires that OpImageRead, OpImageWrite,
+// OpImageFetch, and OpImageSampleExplicitLod use vec4 operands with 32-bit
+// component types (or 16-bit for half). This test verifies that channel sizes
+// and narrow integer types (int8_t, uint8_t, int16_t, uint16_t) are properly
+// widened to vec4 32-bit in the generated SPIR-V calls.
+
+// RUN: %clangxx -O2 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s
+
+#include <sycl/sycl.hpp>
+
+using OCLImageTyRead =
+    typename sycl::detail::opencl_image_type<2, sycl::access::mode::read,
+                                             sycl::access::target::image>::type;
+
+using OCLImageTyWrite =
+    typename sycl::detail::opencl_image_type<2, sycl::access::mode::write,
+                                             sycl::access::target::image>::type;
+
+using OCLSampledImageTy =
+    typename sycl::detail::sampled_opencl_image_type<OCLImageTyRead>::type;
+
+// Test int8_t read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL int8_t test_int8_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<int8_t>(img, sycl::int2(0, 0));
+}
+
+// Test uint8_t read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint8_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL uint8_t test_uint8_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<uint8_t>(img, sycl::int2(0, 0));
+}
+
+// Test int16_t read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int16_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL int16_t test_int16_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<int16_t>(img, sycl::int2(0, 0));
+}
+
+// Test uint16_t read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL uint16_t test_uint16_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<uint16_t>(img, sycl::int2(0, 0));
+}
+
+// Test sycl::vec<int8_t, 4> read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_vec4_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} <4 x i8> @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL sycl::vec<int8_t, 4> test_int8_vec4_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<sycl::vec<int8_t, 4>>(img, sycl::int2(0, 0));
+}
+
+// Test float - should NOT widen (already 32-bit)
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_float_read
+// CHECK: call {{.*}} <4 x float> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} float @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL float test_float_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<float>(img, sycl::int2(0, 0));
+}
+
+// Test int32_t - should NOT widen (already 32-bit)
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int32_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} i32 @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL int32_t test_int32_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<int32_t>(img, sycl::int2(0, 0));
+}
+
+// Test sycl::half - should NOT widen (16-bit is allowed for half)
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_half_read
+// CHECK: call {{.*}} <4 x half> @{{.*}}__spirv_ImageRead
+// CHECK-NOT: call {{.*}} half @{{.*}}__spirv_ImageRead
+SYCL_EXTERNAL sycl::half test_half_read(OCLImageTyRead img) {
+  return __invoke__ImageRead<sycl::half>(img, sycl::int2(0, 0));
+}
+
+// Test int8_t write - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_write
+// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite
+// CHECK-SAME: <4 x i32>
+// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} i8
+SYCL_EXTERNAL void test_int8_write(OCLImageTyWrite img, int8_t val) {
+  __invoke__ImageWrite(img, sycl::int2(0, 0), val);
+}
+
+// Test uint16_t write - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_write
+// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite
+// CHECK-SAME: <4 x i32>
+// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} i16
+SYCL_EXTERNAL void test_uint16_write(OCLImageTyWrite img, uint16_t val) {
+  __invoke__ImageWrite(img, sycl::int2(0, 0), val);
+}
+
+// Test sycl::vec<uint8_t, 2> write - should widen to <4 x i32> with zero-fill
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint8_vec2_write
+// CHECK: call spir_func void @{{.*}}__spirv_ImageWrite
+// CHECK-SAME: <4 x i32>
+// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} <2 x i8>
+SYCL_EXTERNAL void test_uint8_vec2_write(OCLImageTyWrite img, sycl::vec<uint8_t, 2> val) {
+  __invoke__ImageWrite(img, sycl::int2(0, 0), val);
+}
+
+// Test int8_t fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageFetch
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageFetch
+SYCL_EXTERNAL int8_t test_int8_fetch(OCLImageTyRead img) {
+  return __invoke__ImageFetch<int8_t>(img, sycl::int2(0, 0));
+}
+
+// Test uint16_t fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageFetch
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageFetch
+SYCL_EXTERNAL uint16_t test_uint16_fetch(OCLImageTyRead img) {
+  return __invoke__ImageFetch<uint16_t>(img, sycl::int2(0, 0));
+}
+
+// Test int8_t sampled fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_sampled_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageFetch
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_SampledImageFetch
+SYCL_EXTERNAL int8_t test_int8_sampled_fetch(OCLSampledImageTy img) {
+  return __invoke__SampledImageFetch<int8_t>(img, sycl::int2(0, 0));
+}
+
+// Test uint16_t sampled fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_sampled_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageFetch
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_SampledImageFetch
+SYCL_EXTERNAL uint16_t test_uint16_sampled_fetch(OCLSampledImageTy img) {
+  return __invoke__SampledImageFetch<uint16_t>(img, sycl::int2(0, 0));
+}
+
+// Test int8_t array read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayRead
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageArrayRead
+SYCL_EXTERNAL int8_t test_int8_array_read(OCLImageTyRead img) {
+  return __invoke__ImageArrayRead<int8_t>(img, sycl::int2(0, 0), 0);
+}
+
+// Test uint16_t array read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_read
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayRead
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageArrayRead
+SYCL_EXTERNAL uint16_t test_uint16_array_read(OCLImageTyRead img) {
+  return __invoke__ImageArrayRead<uint16_t>(img, sycl::int2(0, 0), 0);
+}
+
+// Test int8_t array write - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_write
+// CHECK: call spir_func void @{{.*}}__spirv_ImageArrayWrite
+// CHECK-SAME: <4 x i32>
+// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageArrayWrite{{.*}} i8
+SYCL_EXTERNAL void test_int8_array_write(OCLImageTyWrite img, int8_t val) {
+  __invoke__ImageArrayWrite(img, sycl::int2(0, 0), 0, val);
+}
+
+// Test uint16_t array write - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_write
+// CHECK: call spir_func void @{{.*}}__spirv_ImageArrayWrite
+// CHECK-SAME: <4 x i32>
+// CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageArrayWrite{{.*}} i16
+SYCL_EXTERNAL void test_uint16_array_write(OCLImageTyWrite img, uint16_t val) {
+  __invoke__ImageArrayWrite(img, sycl::int2(0, 0), 0, val);
+}
+
+// Test int8_t array fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_array_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayFetch
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageArrayFetch
+SYCL_EXTERNAL int8_t test_int8_array_fetch(OCLImageTyRead img) {
+  return __invoke__ImageArrayFetch<int8_t>(img, sycl::int2(0, 0), 0);
+}
+
+// Test uint16_t array fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_array_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageArrayFetch
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageArrayFetch
+SYCL_EXTERNAL uint16_t test_uint16_array_fetch(OCLImageTyRead img) {
+  return __invoke__ImageArrayFetch<uint16_t>(img, sycl::int2(0, 0), 0);
+}
+
+// Test int8_t sampled array fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_sampled_array_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageArrayFetch
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_SampledImageArrayFetch
+SYCL_EXTERNAL int8_t test_int8_sampled_array_fetch(OCLSampledImageTy img) {
+  return __invoke__SampledImageArrayFetch<int8_t>(img, sycl::int2(0, 0), 0);
+}
+
+// Test uint16_t sampled array fetch - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_sampled_array_fetch
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_SampledImageArrayFetch
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_SampledImageArrayFetch
+SYCL_EXTERNAL uint16_t test_uint16_sampled_array_fetch(OCLSampledImageTy img) {
+  return __invoke__SampledImageArrayFetch<uint16_t>(img, sycl::int2(0, 0), 0);
+}
+
+// Test int8_t cubemap read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_cubemap
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleCubemap
+SYCL_EXTERNAL int8_t test_int8_read_cubemap(OCLSampledImageTy img) {
+  return __invoke__ImageReadCubemap<int8_t>(img, sycl::float3(0.0f, 0.0f, 0.0f));
+}
+
+// Test uint16_t cubemap read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_cubemap
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleCubemap
+SYCL_EXTERNAL uint16_t test_uint16_read_cubemap(OCLSampledImageTy img) {
+  return __invoke__ImageReadCubemap<uint16_t>(img, sycl::float3(0.0f, 0.0f, 0.0f));
+}
+
+// Test int8_t lod read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_lod
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod
+SYCL_EXTERNAL int8_t test_int8_read_lod(OCLSampledImageTy img) {
+  return __invoke__ImageReadLod<int8_t>(img, sycl::float2(0.0f, 0.0f), 0.0f);
+}
+
+// Test uint16_t lod read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_lod
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod
+SYCL_EXTERNAL uint16_t test_uint16_read_lod(OCLSampledImageTy img) {
+  return __invoke__ImageReadLod<uint16_t>(img, sycl::float2(0.0f, 0.0f), 0.0f);
+}
+
+// Test int8_t grad read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_grad
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod
+SYCL_EXTERNAL int8_t test_int8_read_grad(OCLSampledImageTy img) {
+  return __invoke__ImageReadGrad<int8_t>(img, sycl::float2(0.0f, 0.0f),
+                                         sycl::float2(0.0f, 0.0f),
+                                         sycl::float2(0.0f, 0.0f));
+}
+
+// Test uint16_t grad read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_grad
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod
+SYCL_EXTERNAL uint16_t test_uint16_read_grad(OCLSampledImageTy img) {
+  return __invoke__ImageReadGrad<uint16_t>(img, sycl::float2(0.0f, 0.0f),
+                                            sycl::float2(0.0f, 0.0f),
+                                            sycl::float2(0.0f, 0.0f));
+}
+
+// Test int8_t sampler read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_int8_read_sampler
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
+// CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod
+SYCL_EXTERNAL int8_t test_int8_read_sampler(OCLImageTyRead img,
+                                             const __ocl_sampler_t &smpl) {
+  return __invoke__ImageReadSampler<int8_t>(img, sycl::float2(0.0f, 0.0f), smpl);
+}
+
+// Test uint16_t sampler read - should widen to <4 x i32>
+// CHECK-LABEL: define {{.*}} @_Z{{.*}}test_uint16_read_sampler
+// CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
+// CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod
+SYCL_EXTERNAL uint16_t test_uint16_read_sampler(OCLImageTyRead img,
+                                                 const __ocl_sampler_t &smpl) {
+  return __invoke__ImageReadSampler<uint16_t>(img, sycl::float2(0.0f, 0.0f), smpl);
+}

From c01ad0f5f9054da3038c593b3b5ac59f0f78d15c Mon Sep 17 00:00:00 2001
From: Sean Stirling <sstirlin@qti.qualcomm.com>
Date: Tue, 5 May 2026 16:08:08 +0100
Subject: [PATCH 2/3] Remove static_asserts

---
 sycl/include/sycl/detail/image_ocl_types.hpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp
index dcdccb943943a..ac4af0e7ca7f3 100644
--- a/sycl/include/sycl/detail/image_ocl_types.hpp
+++ b/sycl/include/sycl/detail/image_ocl_types.hpp
@@ -74,16 +74,6 @@ template <> struct spirv_image_widened_elem_type<uint16_t> { using type = uint32
 template <typename T>
 using spirv_image_widened_elem_type_t = typename spirv_image_widened_elem_type<T>::type;
 
-// Compile-time verification of type widening rules.
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<int8_t>, int32_t>);
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<uint8_t>, uint32_t>);
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<int16_t>, int32_t>);
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<uint16_t>, uint32_t>);
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<int32_t>, int32_t>);
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<uint32_t>, uint32_t>);
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<float>, float>);
-static_assert(std::is_same_v<spirv_image_widened_elem_type_t<_Float16>, _Float16>);
-
 // Helper function to convert vec4 result to requested OpenCL type.
 // Handles scalar, vec2, vec3, and vec4 return types.
 template <typename RequestedType, typename Vec4Type>

From 229491e26d0a7bb819c69aea67433cdcd5dc849c Mon Sep 17 00:00:00 2001
From: Sean Stirling <sstirlin@qti.qualcomm.com>
Date: Thu, 28 May 2026 16:11:29 +0100
Subject: [PATCH 3/3] Guard NVPTX and AMDGCN

---
 sycl/include/sycl/detail/image_ocl_types.hpp  | 265 ++++++++++--------
 .../check_device_code/image_spirv_types.cpp   |  23 +-
 2 files changed, 161 insertions(+), 127 deletions(-)

diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp
index ac4af0e7ca7f3..7d435154ed3bf 100644
--- a/sycl/include/sycl/detail/image_ocl_types.hpp
+++ b/sycl/include/sycl/detail/image_ocl_types.hpp
@@ -42,7 +42,9 @@ template <typename ImageType> struct sampled_opencl_image_type;
 
 // Helper to extract element type from scalar or OpenCL vector type
 // (e.g., float or float __attribute__((ext_vector_type(4))))
-template <typename T> struct get_image_element_type { using type = T; };
+template <typename T> struct get_image_element_type {
+  using type = T;
+};
 template <typename T, int N>
 struct get_image_element_type<T __attribute__((ext_vector_type(N)))> {
   using type = T;
@@ -66,59 +68,108 @@ inline constexpr int get_num_elements_v = get_num_elements<T>::value;
 // component types, with the sole exception of half (_Float16) which may use
 // 16-bit components. Channel sizes and narrow integer types (8-bit and 16-bit)
 // must be widened to their vec4 32-bit equivalents.
-template <typename T> struct spirv_image_widened_elem_type { using type = T; };
-template <> struct spirv_image_widened_elem_type<int8_t>   { using type = int32_t; };
-template <> struct spirv_image_widened_elem_type<uint8_t>  { using type = uint32_t; };
-template <> struct spirv_image_widened_elem_type<int16_t>  { using type = int32_t; };
-template <> struct spirv_image_widened_elem_type<uint16_t> { using type = uint32_t; };
+//
+// On NVPTX and AMDGCN, the __spirv_Image* builtins are implemented directly in
+// libclc with byte strides taken into account. Widening to 32-bit or vec4
+// selects the wrong libclc overload. On those targets spirv_image_call_type_t
+// leaves the type unchanged.
+#if !defined(__NVPTX__) && !defined(__AMDGCN__)
+template <typename T> struct spirv_image_widened_elem_type {
+  using type = T;
+};
+template <> struct spirv_image_widened_elem_type<int8_t> {
+  using type = int32_t;
+};
+template <> struct spirv_image_widened_elem_type<uint8_t> {
+  using type = uint32_t;
+};
+template <> struct spirv_image_widened_elem_type<int16_t> {
+  using type = int32_t;
+};
+template <> struct spirv_image_widened_elem_type<uint16_t> {
+  using type = uint32_t;
+};
+template <typename T>
+using spirv_image_widened_elem_type_t =
+    typename spirv_image_widened_elem_type<T>::type;
+#endif
+
+// spirv_image_call_type_t<T>: the type to use for __spirv_Image* read/write
+// calls. On SPIR-V/OpenCL: widened elem type forced to vec4. On NVPTX/AMDGCN: T
+// unchanged (libclc has native-typed overloads).
+#if defined(__NVPTX__) || defined(__AMDGCN__)
+template <typename T> struct spirv_image_call_type {
+  using type = T;
+};
+#else
+template <typename T> struct spirv_image_call_type {
+  using ElemT = spirv_image_widened_elem_type_t<get_image_element_type_t<T>>;
+  using type = ElemT __attribute__((ext_vector_type(4)));
+};
+#endif
 template <typename T>
-using spirv_image_widened_elem_type_t = typename spirv_image_widened_elem_type<T>::type;
+using spirv_image_call_type_t = typename spirv_image_call_type<T>::type;
+
+// Helper function to convert the result of a __spirv_Image* call back to the
+// requested OpenCL type.
+// On SPIR-V/OpenCL the call returns a widened vec4.
+// On NVPTX/AMDGCN CallT == RequestedType so this reduces to identity casts.
+template <typename RequestedType, typename CallT>
+static inline constexpr RequestedType
+convertImageCallResultToOpenCLType(CallT callResult) {
+  if constexpr (std::is_same_v<RequestedType, CallT>)
+    return callResult;
 
-// Helper function to convert vec4 result to requested OpenCL type.
-// Handles scalar, vec2, vec3, and vec4 return types.
-template <typename RequestedType, typename Vec4Type>
-static inline constexpr RequestedType convertVec4ToRequestedType(Vec4Type vec4Result) {
   using ElemType = get_image_element_type_t<RequestedType>;
   constexpr int NumElements = get_num_elements_v<RequestedType>;
 
-  // Extract components based on RequestedType
   if constexpr (NumElements == 1) {
-    return static_cast<ElemType>(vec4Result[0]);
+    if constexpr (get_num_elements_v<CallT> == 1)
+      return static_cast<ElemType>(callResult);
+    else
+      return static_cast<ElemType>(callResult[0]);
   } else if constexpr (NumElements == 2) {
     using Vec2Type = ElemType __attribute__((ext_vector_type(2)));
     Vec2Type result;
-    result[0] = static_cast<ElemType>(vec4Result[0]);
-    result[1] = static_cast<ElemType>(vec4Result[1]);
+    result[0] = static_cast<ElemType>(callResult[0]);
+    result[1] = static_cast<ElemType>(callResult[1]);
     return result;
   } else if constexpr (NumElements == 3) {
     using Vec3Type = ElemType __attribute__((ext_vector_type(3)));
     Vec3Type result;
-    result[0] = static_cast<ElemType>(vec4Result[0]);
-    result[1] = static_cast<ElemType>(vec4Result[1]);
-    result[2] = static_cast<ElemType>(vec4Result[2]);
+    result[0] = static_cast<ElemType>(callResult[0]);
+    result[1] = static_cast<ElemType>(callResult[1]);
+    result[2] = static_cast<ElemType>(callResult[2]);
     return result;
   } else {
     static_assert(NumElements == 4, "Vector size must be 1, 2, 3, or 4");
     using Vec4NarrowType = ElemType __attribute__((ext_vector_type(4)));
     Vec4NarrowType result;
-    result[0] = static_cast<ElemType>(vec4Result[0]);
-    result[1] = static_cast<ElemType>(vec4Result[1]);
-    result[2] = static_cast<ElemType>(vec4Result[2]);
-    result[3] = static_cast<ElemType>(vec4Result[3]);
+    result[0] = static_cast<ElemType>(callResult[0]);
+    result[1] = static_cast<ElemType>(callResult[1]);
+    result[2] = static_cast<ElemType>(callResult[2]);
+    result[3] = static_cast<ElemType>(callResult[3]);
     return result;
   }
 }
 
-// Helper function to convert scalar or OpenCL vector value to vec4 for OpImageWrite.
+// Helper function to convert scalar or OpenCL vector value to the appropriate
+// call type for __spirv_Image* write.
+// On SPIR-V/OpenCL this widens to vec4.
+// on NVPTX/AMDGCN CallT == SourceType so this reduces to identity casts.
 template <typename SourceType>
-static inline constexpr auto convertRequestedTypeToVec4(SourceType val) {
-  using RawElemType = get_image_element_type_t<SourceType>;
-  using ElemType = spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
+static inline constexpr auto convertOpenCLTypeToImageCallType(SourceType val) {
+  using CallT = spirv_image_call_type_t<SourceType>;
+  if constexpr (std::is_same_v<SourceType, CallT>)
+    return val;
+
+  using ElemType = get_image_element_type_t<CallT>;
   constexpr int NumElements = get_num_elements_v<SourceType>;
 
-  Vec4Type result{};
-  if constexpr (NumElements == 1) {
+  CallT result{};
+  if constexpr (get_num_elements_v<CallT> == 1) {
+    result = static_cast<ElemType>(val);
+  } else if constexpr (NumElements == 1) {
     result[0] = static_cast<ElemType>(val);
   } else if constexpr (NumElements == 2) {
     result[0] = static_cast<ElemType>(val[0]);
@@ -162,10 +213,10 @@ static void __invoke__ImageWrite(ImageT Img, CoordT Coords, ValT Val) {
   auto TmpVal = sycl::detail::convertToOpenCLType(Val);
 
   // SPIR-V spec requires OpImageWrite texel to be vec4.
-  auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal);
+  auto callVal = sycl::detail::convertOpenCLTypeToImageCallType(TmpVal);
 
-  __spirv_ImageWrite<ImageT, decltype(TmpCoords), decltype(vec4Val)>(
-      Img, TmpCoords, vec4Val);
+  __spirv_ImageWrite<ImageT, decltype(TmpCoords), decltype(callVal)>(
+      Img, TmpCoords, callVal);
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -175,16 +226,14 @@ static RetType __invoke__ImageRead(ImageT Img, CoordT Coords) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-  // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageRead<Vec4Type, ImageT, decltype(TmpCoords)>(Img, TmpCoords);
+  // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half)
+  // vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_ImageRead<CallT, ImageT, decltype(TmpCoords)>(Img, TmpCoords);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -194,17 +243,14 @@ static RetType __invoke__ImageFetch(ImageT Img, CoordT Coords) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageFetch<Vec4Type, ImageT, decltype(TmpCoords)>(Img,
-                                                                TmpCoords);
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half)
+  // vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_ImageFetch<CallT, ImageT, decltype(TmpCoords)>(Img, TmpCoords);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -214,17 +260,15 @@ static RetType __invoke__SampledImageFetch(ImageT Img, CoordT Coords) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_SampledImageFetch<Vec4Type, ImageT, decltype(TmpCoords)>(
-          Img, TmpCoords);
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half)
+  // vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_SampledImageFetch<CallT, ImageT, decltype(TmpCoords)>(Img,
+                                                                    TmpCoords);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -251,17 +295,15 @@ static RetType __invoke__ImageArrayFetch(ImageT Img, CoordT Coords,
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageArrayFetch<Vec4Type, ImageT, decltype(TmpCoords)>(
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half)
+  // vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_ImageArrayFetch<CallT, ImageT, decltype(TmpCoords)>(
           Img, TmpCoords, ArrayLayer);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -272,17 +314,15 @@ static RetType __invoke__SampledImageArrayFetch(ImageT Img, CoordT Coords,
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_SampledImageArrayFetch<Vec4Type, ImageT, decltype(TmpCoords)>(
+  // SPIR-V spec requires OpImageFetch to return a 32-bit (or 16-bit for half)
+  // vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_SampledImageArrayFetch<CallT, ImageT, decltype(TmpCoords)>(
           Img, TmpCoords, ArrayLayer);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -293,17 +333,14 @@ static RetType __invoke__ImageArrayRead(ImageT Img, CoordT Coords,
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpCoords = sycl::detail::convertToOpenCLType(Coords);
 
-  // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageArrayRead<Vec4Type, ImageT, decltype(TmpCoords)>(
-          Img, TmpCoords, ArrayLayer);
+  // SPIR-V spec requires OpImageRead to return a 32-bit (or 16-bit for half)
+  // vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult = __spirv_ImageArrayRead<CallT, ImageT, decltype(TmpCoords)>(
+      Img, TmpCoords, ArrayLayer);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename ImageT, typename CoordT, typename ValT>
@@ -315,10 +352,10 @@ static void __invoke__ImageArrayWrite(ImageT Img, CoordT Coords, int ArrayLayer,
   auto TmpVal = sycl::detail::convertToOpenCLType(Val);
 
   // SPIR-V spec requires OpImageWrite texel to be vec4.
-  auto vec4Val = sycl::detail::convertRequestedTypeToVec4(TmpVal);
+  auto callVal = sycl::detail::convertOpenCLTypeToImageCallType(TmpVal);
 
-  __spirv_ImageArrayWrite<ImageT, decltype(TmpCoords), decltype(vec4Val)>(
-      Img, TmpCoords, ArrayLayer, vec4Val);
+  __spirv_ImageArrayWrite<ImageT, decltype(TmpCoords), decltype(callVal)>(
+      Img, TmpCoords, ArrayLayer, callVal);
 }
 
 template <typename RetType, typename SmpImageT, typename DirVecT>
@@ -328,17 +365,15 @@ static RetType __invoke__ImageReadCubemap(SmpImageT SmpImg, DirVecT DirVec) {
   using TempRetT = sycl::detail::ConvertToOpenCLType_t<RetType>;
   auto TmpDirVec = sycl::detail::convertToOpenCLType(DirVec);
 
-  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageSampleCubemap<SmpImageT, Vec4Type, decltype(TmpDirVec)>(
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit
+  // for half) vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_ImageSampleCubemap<SmpImageT, CallT, decltype(TmpDirVec)>(
           SmpImg, TmpDirVec);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename SmpImageT, typename CoordT>
@@ -356,17 +391,15 @@ static RetType __invoke__ImageReadLod(SmpImageT SmpImg, CoordT Coords,
   // Sampled Image must be an object whose type is OpTypeSampledImage
   // Image Operands encodes what operands follow. Either Lod
   // or Grad image operands must be present
-  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageSampleExplicitLod<SmpImageT, Vec4Type, decltype(TmpCoords)>(
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit
+  // for half) vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_ImageSampleExplicitLod<SmpImageT, CallT, decltype(TmpCoords)>(
           SmpImg, TmpCoords, ImageOperands::Lod, Level);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename SmpImageT, typename CoordT>
@@ -387,17 +420,15 @@ static RetType __invoke__ImageReadGrad(SmpImageT SmpImg, CoordT Coords,
   // Sampled Image must be an object whose type is OpTypeSampledImage
   // Image Operands encodes what operands follow. Either Lod
   // or Grad image operands must be present
-  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageSampleExplicitLod<SmpImageT, Vec4Type, decltype(TmpCoords)>(
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit
+  // for half) vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_ImageSampleExplicitLod<SmpImageT, CallT, decltype(TmpCoords)>(
           SmpImg, TmpCoords, ImageOperands::Grad, TmpGraddX, TmpGraddY);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 template <typename RetType, typename ImageT, typename CoordT>
@@ -422,18 +453,16 @@ static RetType __invoke__ImageReadSampler(ImageT Img, CoordT Coords,
   enum ImageOperands { Lod = 0x2 };
 
   // Lod value is zero as mipmap is not supported.
-  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit for half) vec4.
-  using RawElemType = sycl::detail::get_image_element_type_t<TempRetT>;
-  using ElemType = sycl::detail::spirv_image_widened_elem_type_t<RawElemType>;
-  using Vec4Type = ElemType __attribute__((ext_vector_type(4)));
-
-  Vec4Type vec4Result =
-      __spirv_ImageSampleExplicitLod<SampledT, Vec4Type, decltype(TmpCoords)>(
+  // SPIR-V spec requires OpImageSampleExplicitLod to return a 32-bit (or 16-bit
+  // for half) vec4.
+  using CallT = sycl::detail::spirv_image_call_type_t<TempRetT>;
+  CallT callResult =
+      __spirv_ImageSampleExplicitLod<SampledT, CallT, decltype(TmpCoords)>(
           __spirv_SampledImage<ImageT, SampledT>(Img, Smpl), TmpCoords,
           ImageOperands::Lod, 0.0f);
 
   return sycl::detail::convertFromOpenCLTypeFor<RetType>(
-      sycl::detail::convertVec4ToRequestedType<TempRetT>(vec4Result));
+      sycl::detail::convertImageCallResultToOpenCLType<TempRetT>(callResult));
 }
 
 namespace sycl {
diff --git a/sycl/test/check_device_code/image_spirv_types.cpp b/sycl/test/check_device_code/image_spirv_types.cpp
index d1922b7e2c1fb..d9b854728b8cc 100644
--- a/sycl/test/check_device_code/image_spirv_types.cpp
+++ b/sycl/test/check_device_code/image_spirv_types.cpp
@@ -108,7 +108,8 @@ SYCL_EXTERNAL void test_uint16_write(OCLImageTyWrite img, uint16_t val) {
 // CHECK: call spir_func void @{{.*}}__spirv_ImageWrite
 // CHECK-SAME: <4 x i32>
 // CHECK-NOT: call spir_func void @{{.*}}__spirv_ImageWrite{{.*}} <2 x i8>
-SYCL_EXTERNAL void test_uint8_vec2_write(OCLImageTyWrite img, sycl::vec<uint8_t, 2> val) {
+SYCL_EXTERNAL void test_uint8_vec2_write(OCLImageTyWrite img,
+                                         sycl::vec<uint8_t, 2> val) {
   __invoke__ImageWrite(img, sycl::int2(0, 0), val);
 }
 
@@ -215,7 +216,8 @@ SYCL_EXTERNAL uint16_t test_uint16_sampled_array_fetch(OCLSampledImageTy img) {
 // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap
 // CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleCubemap
 SYCL_EXTERNAL int8_t test_int8_read_cubemap(OCLSampledImageTy img) {
-  return __invoke__ImageReadCubemap<int8_t>(img, sycl::float3(0.0f, 0.0f, 0.0f));
+  return __invoke__ImageReadCubemap<int8_t>(img,
+                                            sycl::float3(0.0f, 0.0f, 0.0f));
 }
 
 // Test uint16_t cubemap read - should widen to <4 x i32>
@@ -223,7 +225,8 @@ SYCL_EXTERNAL int8_t test_int8_read_cubemap(OCLSampledImageTy img) {
 // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleCubemap
 // CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleCubemap
 SYCL_EXTERNAL uint16_t test_uint16_read_cubemap(OCLSampledImageTy img) {
-  return __invoke__ImageReadCubemap<uint16_t>(img, sycl::float3(0.0f, 0.0f, 0.0f));
+  return __invoke__ImageReadCubemap<uint16_t>(img,
+                                              sycl::float3(0.0f, 0.0f, 0.0f));
 }
 
 // Test int8_t lod read - should widen to <4 x i32>
@@ -258,8 +261,8 @@ SYCL_EXTERNAL int8_t test_int8_read_grad(OCLSampledImageTy img) {
 // CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod
 SYCL_EXTERNAL uint16_t test_uint16_read_grad(OCLSampledImageTy img) {
   return __invoke__ImageReadGrad<uint16_t>(img, sycl::float2(0.0f, 0.0f),
-                                            sycl::float2(0.0f, 0.0f),
-                                            sycl::float2(0.0f, 0.0f));
+                                           sycl::float2(0.0f, 0.0f),
+                                           sycl::float2(0.0f, 0.0f));
 }
 
 // Test int8_t sampler read - should widen to <4 x i32>
@@ -267,8 +270,9 @@ SYCL_EXTERNAL uint16_t test_uint16_read_grad(OCLSampledImageTy img) {
 // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
 // CHECK-NOT: call {{.*}} i8 @{{.*}}__spirv_ImageSampleExplicitLod
 SYCL_EXTERNAL int8_t test_int8_read_sampler(OCLImageTyRead img,
-                                             const __ocl_sampler_t &smpl) {
-  return __invoke__ImageReadSampler<int8_t>(img, sycl::float2(0.0f, 0.0f), smpl);
+                                            const __ocl_sampler_t &smpl) {
+  return __invoke__ImageReadSampler<int8_t>(img, sycl::float2(0.0f, 0.0f),
+                                            smpl);
 }
 
 // Test uint16_t sampler read - should widen to <4 x i32>
@@ -276,6 +280,7 @@ SYCL_EXTERNAL int8_t test_int8_read_sampler(OCLImageTyRead img,
 // CHECK: call {{.*}} <4 x i32> @{{.*}}__spirv_ImageSampleExplicitLod
 // CHECK-NOT: call {{.*}} i16 @{{.*}}__spirv_ImageSampleExplicitLod
 SYCL_EXTERNAL uint16_t test_uint16_read_sampler(OCLImageTyRead img,
-                                                 const __ocl_sampler_t &smpl) {
-  return __invoke__ImageReadSampler<uint16_t>(img, sycl::float2(0.0f, 0.0f), smpl);
+                                                const __ocl_sampler_t &smpl) {
+  return __invoke__ImageReadSampler<uint16_t>(img, sycl::float2(0.0f, 0.0f),
+                                              smpl);
 }