From 0086e37ff907f2511b11be7609fc2348c85d0a41 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 23 Jan 2026 03:56:12 -0800
Subject: [PATCH 1/5] Remove cython backend impl of putmask

---
 dpnp/dpnp_algo/CMakeLists.txt         |  5 +--
 dpnp/dpnp_algo/dpnp_algo.pyx          |  1 -
 dpnp/dpnp_algo/dpnp_algo_indexing.pxi | 54 ---------------------------
 3 files changed, 1 insertion(+), 59 deletions(-)
 delete mode 100644 dpnp/dpnp_algo/dpnp_algo_indexing.pxi

diff --git a/dpnp/dpnp_algo/CMakeLists.txt b/dpnp/dpnp_algo/CMakeLists.txt
index c18a3dc8eac9..3f8196bc64f8 100644
--- a/dpnp/dpnp_algo/CMakeLists.txt
+++ b/dpnp/dpnp_algo/CMakeLists.txt
@@ -27,10 +27,7 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
-set(dpnp_algo_pyx_deps
-    ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi
-    ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_indexing.pxi
-)
+set(dpnp_algo_pyx_deps ${CMAKE_CURRENT_SOURCE_DIR}/dpnp_algo_sorting.pxi)
 
 build_dpnp_cython_ext_with_backend(
   dpnp_algo
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 53abcad11986..2946445f9a50 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -48,7 +48,6 @@ __all__ = [
 ]
 
 
-include "dpnp_algo_indexing.pxi"
 include "dpnp_algo_sorting.pxi"
 
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
deleted file mode 100644
index 54ed3e99fba8..000000000000
--- a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
+++ /dev/null
@@ -1,54 +0,0 @@
-# cython: language_level=3
-# cython: linetrace=True
-# -*- coding: utf-8 -*-
-# *****************************************************************************
-# Copyright (c) 2016, Intel Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# - Redistributions of source code must retain the above copyright notice,
-#   this list of conditions and the following disclaimer.
-# - Redistributions in binary form must reproduce the above copyright notice,
-#   this list of conditions and the following disclaimer in the documentation
-#   and/or other materials provided with the distribution.
-# - Neither the name of the copyright holder nor the names of its contributors
-#   may be used to endorse or promote products derived from this software
-#   without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-# *****************************************************************************
-
-"""Module Backend (Indexing part)
-
-This module contains interface functions between C backend layer
-and the rest of the library
-
-"""
-
-# NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file
-
-__all__ += [
-    "dpnp_putmask",
-]
-
-cpdef dpnp_putmask(utils.dpnp_descriptor arr, utils.dpnp_descriptor mask, utils.dpnp_descriptor values):
-    cdef int values_size = values.size
-
-    mask_flatiter = mask.get_pyobj().flat
-    arr_flatiter = arr.get_pyobj().flat
-    values_flatiter = values.get_pyobj().flat
-
-    for i in range(arr.size):
-        if mask_flatiter[i]:
-            arr_flatiter[i] = values_flatiter[i % values_size]

From cc2c154513f0dab6243c005e322e9c2937cc80cc Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 23 Jan 2026 04:06:05 -0800
Subject: [PATCH 2/5] Add C-contig implementation of putmask

---
 .../extensions/indexing/CMakeLists.txt        |   1 +
 .../extensions/indexing/indexing_py.cpp       |   2 +
 dpnp/backend/extensions/indexing/putmask.cpp  | 223 +++++++++++++++
 dpnp/backend/extensions/indexing/putmask.hpp  |  38 +++
 .../extensions/indexing/putmask_kernel.hpp    | 257 ++++++++++++++++++
 dpnp/dpnp_iface_indexing.py                   |  91 +++++--
 6 files changed, 592 insertions(+), 20 deletions(-)
 create mode 100644 dpnp/backend/extensions/indexing/putmask.cpp
 create mode 100644 dpnp/backend/extensions/indexing/putmask.hpp
 create mode 100644 dpnp/backend/extensions/indexing/putmask_kernel.hpp

diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt
index a6691f31f559..b407aa30eb07 100644
--- a/dpnp/backend/extensions/indexing/CMakeLists.txt
+++ b/dpnp/backend/extensions/indexing/CMakeLists.txt
@@ -30,6 +30,7 @@
 set(python_module_name _indexing_impl)
 set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/choose.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/putmask.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/indexing_py.cpp
 )
 
diff --git a/dpnp/backend/extensions/indexing/indexing_py.cpp b/dpnp/backend/extensions/indexing/indexing_py.cpp
index a2d0b2efd512..4c4e6808b2dd 100644
--- a/dpnp/backend/extensions/indexing/indexing_py.cpp
+++ b/dpnp/backend/extensions/indexing/indexing_py.cpp
@@ -33,8 +33,10 @@
 #include <pybind11/pybind11.h>
 
 #include "choose.hpp"
+#include "putmask.hpp"
 
 PYBIND11_MODULE(_indexing_impl, m)
 {
     dpnp::extensions::indexing::init_choose(m);
+    dpnp::extensions::indexing::init_putmask(m);
 }
diff --git a/dpnp/backend/extensions/indexing/putmask.cpp b/dpnp/backend/extensions/indexing/putmask.cpp
new file mode 100644
index 000000000000..153da7dbf548
--- /dev/null
+++ b/dpnp/backend/extensions/indexing/putmask.cpp
@@ -0,0 +1,223 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <stdexcept>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "putmask_kernel.hpp"
+
+#include "../elementwise_functions/simplify_iteration_space.hpp"
+
+// dpctl tensor headers
+#include "utils/offset_utils.hpp"
+#include "utils/output_validation.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+// utils extension headers
+#include "ext/common.hpp"
+#include "ext/validation_utils.hpp"
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::usm_ndarray;
+
+using ext::common::dtype_from_typenum;
+using ext::validation::array_names;
+using ext::validation::check_has_dtype;
+using ext::validation::check_no_overlap;
+using ext::validation::check_num_dims;
+using ext::validation::check_queue;
+using ext::validation::check_same_dtype;
+using ext::validation::check_same_size;
+using ext::validation::check_writable;
+
+namespace dpnp::extensions::indexing
+{
+using ext::common::init_dispatch_vector;
+
+typedef sycl::event (*putmask_contig_fn_ptr_t)(
+    sycl::queue &,
+    const std::size_t, // nelems
+    char *,            // dst
+    const char *,      // mask
+    const char *,      // values
+    const std::size_t, // values_size
+    const std::vector<sycl::event> &);
+
+static putmask_contig_fn_ptr_t putmask_contig_dispatch_vector[td_ns::num_types];
+
+std::pair<sycl::event, sycl::event>
+    py_putmask(const usm_ndarray &dst,
+               const usm_ndarray &mask,
+               const usm_ndarray &values,
+               sycl::queue &exec_q,
+               const std::vector<sycl::event> &depends = {})
+{
+    array_names names = {{&dst, "dst"}, {&mask, "mask"}, {&values, "values"}};
+
+    check_same_dtype(&dst, &values, names);
+    check_has_dtype(&mask, td_ns::typenum_t::BOOL, names);
+
+    check_same_size({&dst, &mask}, names);
+    const int nd = dst.get_ndim();
+    check_num_dims(&mask, nd, names);
+
+    check_queue({&dst, &mask, &values}, names, exec_q);
+    check_no_overlap({&mask, &values}, {&dst}, names);
+    check_writable({&dst}, names);
+
+    // values must be 1D
+    check_num_dims(&values, 1, names);
+
+    auto types = td_ns::usm_ndarray_types();
+    // dst_typeid == values_typeid (check_same_dtype(&dst, &values, names))
+    int dst_values_typeid = types.typenum_to_lookup_id(dst.get_typenum());
+
+    const py::ssize_t *dst_shape = dst.get_shape_raw();
+    const py::ssize_t *mask_shape = mask.get_shape_raw();
+    bool shapes_equal(true);
+    std::size_t nelems(1);
+
+    for (int i = 0; i < std::max(nd, 1); ++i) {
+        const py::ssize_t d = (nd == 0 ? 1 : dst_shape[i]);
+        const py::ssize_t m = (nd == 0 ? 1 : mask_shape[i]);
+        nelems *= static_cast<std::size_t>(d);
+        shapes_equal = shapes_equal && (d == m);
+    }
+    if (!shapes_equal) {
+        throw py::value_error("`mask` and `dst` shapes must match");
+    }
+
+    // if nelems is zero, return
+    if (nelems == 0) {
+        return {sycl::event(), sycl::event()};
+    }
+
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, nelems);
+
+    char *dst_p = dst.get_data();
+    const char *mask_p = mask.get_data();
+    const char *values_p = values.get_data();
+    const std::size_t values_size = values.get_size();
+
+    // handle C contiguous inputs
+    const bool is_dst_c_contig = dst.is_c_contiguous();
+    const bool is_mask_c_contig = mask.is_c_contiguous();
+    const bool is_values_c_contig = values.is_c_contiguous();
+
+    const bool all_c_contig =
+        (is_dst_c_contig && is_mask_c_contig && is_values_c_contig);
+
+    if (all_c_contig) {
+        auto contig_fn = putmask_contig_dispatch_vector[dst_values_typeid];
+
+        if (contig_fn == nullptr) {
+            py::dtype dst_values_dtype_py =
+                dtype_from_typenum(dst_values_typeid);
+            throw std::runtime_error(
+                "Contiguous implementation is missing for " +
+                std::string(py::str(dst_values_dtype_py)) + "data type");
+        }
+
+        auto comp_ev = contig_fn(exec_q, nelems, dst_p, mask_p, values_p,
+                                 values_size, depends);
+        sycl::event ht_ev = dpctl::utils::keep_args_alive(
+            exec_q, {dst, mask, values}, {comp_ev});
+
+        return std::make_pair(ht_ev, comp_ev);
+    }
+
+    throw py::value_error("Stride implementation is not implemented yet");
+}
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * putmask function is available.
+ *
+ * @tparam T Type of input vector `dst` and `values` and of result vector `dst`.
+ */
+template <typename T>
+struct PutMaskOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::int8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint16_t>,
+        td_ns::TypeMapResultEntry<T, std::int16_t>,
+        td_ns::TypeMapResultEntry<T, std::uint32_t>,
+        td_ns::TypeMapResultEntry<T, std::int32_t>,
+        td_ns::TypeMapResultEntry<T, std::uint64_t>,
+        td_ns::TypeMapResultEntry<T, std::int64_t>,
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename fnT, typename T>
+struct PutMaskContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename PutMaskOutputType<T>::value_type,
+                                     void>) {
+            return nullptr;
+        }
+        else {
+            return kernels::putmask_contig_impl<T>;
+        }
+    }
+};
+
+static void populate_putmask_dispatch_vectors()
+{
+    init_dispatch_vector<putmask_contig_fn_ptr_t, PutMaskContigFactory>(
+        putmask_contig_dispatch_vector);
+}
+
+void init_putmask(py::module_ m)
+{
+    populate_putmask_dispatch_vectors();
+
+    m.def("_putmask", &py_putmask, "", py::arg("dst"), py::arg("mask"),
+          py::arg("values"), py::arg("sycl_queue"),
+          py::arg("depends") = py::list());
+}
+
+} // namespace dpnp::extensions::indexing
diff --git a/dpnp/backend/extensions/indexing/putmask.hpp b/dpnp/backend/extensions/indexing/putmask.hpp
new file mode 100644
index 000000000000..2a66d55aa285
--- /dev/null
+++ b/dpnp/backend/extensions/indexing/putmask.hpp
@@ -0,0 +1,38 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::indexing
+{
+void init_putmask(py::module_ m);
+} // namespace dpnp::extensions::indexing
diff --git a/dpnp/backend/extensions/indexing/putmask_kernel.hpp b/dpnp/backend/extensions/indexing/putmask_kernel.hpp
new file mode 100644
index 000000000000..ba73edd7580f
--- /dev/null
+++ b/dpnp/backend/extensions/indexing/putmask_kernel.hpp
@@ -0,0 +1,257 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <cstdint>
+#include <type_traits>
+
+#include <sycl/sycl.hpp>
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/sycl_complex.hpp"
+#include "utils/offset_utils.hpp"
+#include "utils/sycl_utils.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpnp::extensions::indexing::kernels
+{
+template <typename T,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+struct PutMaskContigFunctor
+{
+private:
+    T *dst_ = nullptr;
+    const std::uint8_t *mask_u8_ = nullptr;
+    const T *values_ = nullptr;
+    std::size_t nelems_ = 0;
+    std::size_t val_size_ = 0;
+
+public:
+    PutMaskContigFunctor(T *dst,
+                         const bool *mask,
+                         const T *values,
+                         std::size_t nelems,
+                         std::size_t val_size)
+        : dst_(dst), mask_u8_(reinterpret_cast<const std::uint8_t *>(mask)),
+          values_(values), nelems_(nelems), val_size_(val_size)
+    {
+    }
+
+    void operator()(sycl::nd_item<1> ndit) const
+    {
+        if (val_size_ == 0 || nelems_ == 0) {
+            return;
+        }
+
+        constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+        /* Each work-item processes vec_sz elements, contiguous in memory */
+        /* NOTE: work-group size must be divisible by sub-group size */
+
+        using dpctl::tensor::type_utils::is_complex_v;
+        if constexpr (enable_sg_loadstore && !is_complex_v<T>) {
+            auto sg = ndit.get_sub_group();
+            const std::uint32_t sgSize = sg.get_max_local_range()[0];
+            const std::size_t lane_id = sg.get_local_id()[0];
+
+            const std::size_t base =
+                elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
+                                sg.get_group_id()[0] * sgSize);
+
+            const bool values_no_repeat = (val_size_ >= nelems_);
+
+            if (base + elems_per_wi * sgSize <= nelems_) {
+                using dpctl::tensor::sycl_utils::sub_group_load;
+                using dpctl::tensor::sycl_utils::sub_group_store;
+
+#pragma unroll
+                for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
+                    const std::size_t offset = base + it * sgSize;
+
+                    auto dst_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&dst_[offset]);
+                    auto mask_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&mask_u8_[offset]);
+
+                    const sycl::vec<T, vec_sz> dst_vec =
+                        sub_group_load<vec_sz>(sg, dst_multi_ptr);
+                    const sycl::vec<std::uint8_t, vec_sz> mask_vec =
+                        sub_group_load<vec_sz>(sg, mask_multi_ptr);
+
+                    sycl::vec<T, vec_sz> val_vec;
+
+                    if (values_no_repeat) {
+                        auto values_multi_ptr = sycl::address_space_cast<
+                            sycl::access::address_space::global_space,
+                            sycl::access::decorated::yes>(&values_[offset]);
+
+                        val_vec = sub_group_load<vec_sz>(sg, values_multi_ptr);
+                    }
+                    else {
+                        const std::size_t idx = offset + lane_id;
+#pragma unroll
+                        for (std::uint8_t k = 0; k < vec_sz; ++k) {
+                            const std::size_t g =
+                                idx + static_cast<std::size_t>(k) * sgSize;
+                            val_vec[k] = values_[g % val_size_];
+                        }
+                    }
+
+                    sycl::vec<T, vec_sz> out_vec;
+#pragma unroll
+                    for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) {
+                        out_vec[vec_id] =
+                            (mask_vec[vec_id] != static_cast<std::uint8_t>(0))
+                                ? val_vec[vec_id]
+                                : dst_vec[vec_id];
+                    }
+
+                    sub_group_store<vec_sz>(sg, out_vec, dst_multi_ptr);
+                }
+            }
+            else {
+                const std::size_t lane_id = sg.get_local_id()[0];
+                for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) {
+                    if (mask_u8_[k]) {
+                        const std::size_t v =
+                            values_no_repeat ? k : (k % val_size_);
+                        dst_[k] = values_[v];
+                    }
+                }
+            }
+        }
+        else {
+            const std::size_t gid = ndit.get_global_linear_id();
+            const std::size_t gws = ndit.get_global_range(0);
+
+            const bool values_no_repeat = (val_size_ >= nelems_);
+            for (std::size_t offset = gid; offset < nelems_; offset += gws) {
+                if (mask_u8_[offset]) {
+                    const std::size_t v =
+                        values_no_repeat ? offset : (offset % val_size_);
+                    dst_[offset] = values_[v];
+                }
+            }
+        }
+    }
+};
+
+template <typename T, std::uint8_t vec_sz = 4u, std::uint8_t n_vecs = 2u>
+sycl::event putmask_contig_impl(sycl::queue &exec_q,
+                                std::size_t nelems,
+                                char *dst_cp,
+                                const char *mask_cp,
+                                const char *values_cp,
+                                std::size_t values_size,
+                                const std::vector<sycl::event> &depends = {})
+{
+    T *dst_tp = reinterpret_cast<T *>(dst_cp);
+    const bool *mask_tp = reinterpret_cast<const bool *>(mask_cp);
+    const T *values_tp = reinterpret_cast<const T *>(values_cp);
+
+    constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+    // const std::size_t n_work_items_needed = (nelems + elems_per_wi - 1) /
+    // elems_per_wi;
+    const std::size_t n_work_items_needed = nelems / elems_per_wi;
+    const std::size_t empirical_threshold = std::size_t(1) << 21;
+    const std::size_t lws = (n_work_items_needed <= empirical_threshold)
+                                ? std::size_t(128)
+                                : std::size_t(256);
+
+    const std::size_t n_groups =
+        ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi));
+    const auto gws_range = sycl::range<1>(n_groups * lws);
+    const auto lws_range = sycl::range<1>(lws);
+
+    using dpctl::tensor::kernels::alignment_utils::is_aligned;
+    using dpctl::tensor::kernels::alignment_utils::required_alignment;
+
+    const bool aligned = is_aligned<required_alignment>(dst_tp) &&
+                         is_aligned<required_alignment>(mask_tp) &&
+                         is_aligned<required_alignment>(values_tp);
+
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        if (aligned) {
+            constexpr bool enable_sg = true;
+            using PutMaskFunc =
+                PutMaskContigFunctor<T, vec_sz, n_vecs, enable_sg>;
+
+            cgh.parallel_for<PutMaskFunc>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                PutMaskFunc(dst_tp, mask_tp, values_tp, nelems, values_size));
+        }
+        else {
+            constexpr bool enable_sg = false;
+            using PutMaskFunc =
+                PutMaskContigFunctor<T, vec_sz, n_vecs, enable_sg>;
+
+            cgh.parallel_for<PutMaskFunc>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                PutMaskFunc(dst_tp, mask_tp, values_tp, nelems, values_size));
+        }
+    });
+
+    return comp_ev;
+}
+
+} // namespace dpnp::extensions::indexing::kernels
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index 6e7ab778299b..31f9a680a51f 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -40,6 +40,7 @@
 """
 
 # pylint: disable=protected-access
+# pylint: disable=c-extension-no-member
 
 import operator
 from collections.abc import Iterable
@@ -57,12 +58,8 @@
 # pylint: disable=no-name-in-module
 import dpnp.backend.extensions.indexing._indexing_impl as indexing_ext
 
-# pylint: disable=no-name-in-module
-from .dpnp_algo import (
-    dpnp_putmask,
-)
 from .dpnp_array import dpnp_array
-from .dpnp_utils import call_origin, get_usm_allocations
+from .dpnp_utils import get_usm_allocations
 
 
 def _ravel_multi_index_checks(multi_index, dims, order):
@@ -1804,30 +1801,84 @@ def put_along_axis(a, ind, values, axis, mode="wrap"):
     dpt.put_along_axis(usm_a, usm_ind, usm_vals, axis=axis, mode=mode)
 
 
-def putmask(x1, mask, values):
+def putmask(a, /, mask, values):
     """
     Changes elements of an array based on conditional and input values.
 
     For full documentation refer to :obj:`numpy.putmask`.
 
-    Limitations
-    -----------
-    Input arrays ``arr``, ``mask`` and ``values`` are supported
-    as :obj:`dpnp.ndarray`.
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Target array.
+    mask : {dpnp.ndarray, usm_ndarray}
+        Boolean mask array. It has to be the same shape as `a`.
+    values : {scalar, array_like}
+         Values to put into `a` where `mask` is True.
+         If `values` is smaller than `a`, then it will be repeated.
+
+    See Also
+    --------
+    :obj:`dpnp.place` : Change elements of an array based on conditional and
+                        input values.
+    :obj:`dpnp.put` : Replaces specified elements of an array with given values.
+    :obj:`dpnp.take` :  Take elements from an array along an axis.
+    :obj:`dpnp.copyto` : Copies values from one array to another, broadcasting
+                         as necessary.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> x = np.arange(6).reshape(2, 3)
+    >>> np.putmask(x, x>2, x**2)
+    >>> x
+    array([[ 0,  1,  2],
+           [ 9, 16, 25]])
+
+    If `values` is smaller than `a` it is repeated:
+
+    >>> x = np.arange(5)
+    >>> np.putmask(x, x>1, np.array([-33, -44]))
+    >>> x
+    array([  0,   1, -33, -44, -33])
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(
-        x1, copy_when_strides=False, copy_when_nondefault_queue=False
-    )
-    mask_desc = dpnp.get_dpnp_descriptor(mask, copy_when_nondefault_queue=False)
-    values_desc = dpnp.get_dpnp_descriptor(
-        values, copy_when_nondefault_queue=False
-    )
-    if x1_desc and mask_desc and values_desc:
-        return dpnp_putmask(x1_desc, mask_desc, values_desc)
+    dpnp.check_supported_arrays_type(a, mask)
+    dpnp.check_supported_arrays_type(values, scalar_type=True, all_scalars=True)
+
+    if not a.shape == mask.shape:
+        raise ValueError("mask and data must be the same size")
+
+    mask = dpnp.astype(mask, dpnp.bool, copy=False)
+
+    if dpnp.isscalar(values):
+        a[mask] = values
+
+    elif not dpnp.can_cast(values.dtype, a.dtype):
+        raise TypeError(
+            "Cannot cast array data from"
+            f" {values.dtype} to {a.dtype} according to the rule 'safe'"
+        )
+
+    elif a.shape == values.shape:
+        a[mask] = values[mask]
+
+    else:
+        values = values.ravel(order="C")
 
-    return call_origin(numpy.putmask, x1, mask, values, dpnp_inplace=True)
+        if a.dtype != values.dtype:
+            values = dpnp.astype(values, a.dtype, casting="safe", copy=False)
+
+        _, exec_q = get_usm_allocations([a, mask, values])
+
+        _manager = dpu.SequentialOrderManager[exec_q]
+        dep_evs = _manager.submitted_events
+
+        h_ev, putmask_ev = indexing_ext._putmask(
+            a.get_array(), mask.get_array(), values.get_array(), exec_q, dep_evs
+        )
+        _manager.add_event_pair(h_ev, putmask_ev)
 
 
 def ravel_multi_index(multi_index, dims, mode="raise", order="C"):

From 367bdb4cc683ea1fd07dde0f4416ed211739f186 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 23 Jan 2026 04:20:12 -0800
Subject: [PATCH 3/5] Unskip TestPutmaskDifferentDtypes in cupy tests

---
 dpnp/tests/third_party/cupy/indexing_tests/test_insert.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py b/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py
index 7399343e7e57..29d5238b0315 100644
--- a/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py
+++ b/dpnp/tests/third_party/cupy/indexing_tests/test_insert.py
@@ -231,7 +231,6 @@ def test_putmask_int_mask_scalar_values(self, xp):
 
 class TestPutmaskDifferentDtypes(unittest.TestCase):
 
-    @pytest.mark.skip("putmask() is not fully supported")
     @testing.for_all_dtypes_combination(names=["a_dtype", "val_dtype"])
     def test_putmask_differnt_dtypes_raises(self, a_dtype, val_dtype):
         shape = (2, 3)

From 764a9e58729028ff4a06597f4910e67e377ac589 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 23 Jan 2026 05:38:48 -0800
Subject: [PATCH 4/5] Add TestPutmask

---
 dpnp/tests/test_indexing.py | 268 +++++++++++++++++++++---------------
 1 file changed, 155 insertions(+), 113 deletions(-)

diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py
index 9a55efe138b7..b01ba9abfffb 100644
--- a/dpnp/tests/test_indexing.py
+++ b/dpnp/tests/test_indexing.py
@@ -17,6 +17,12 @@
 
 import dpnp
 from dpnp.dpnp_array import dpnp_array
+from dpnp.tests.helper import (
+    assert_array_equal,
+    assert_dtype_allclose,
+    generate_random_numpy_array,
+    get_all_dtypes,
+)
 
 from .helper import (
     get_abs_array,
@@ -954,126 +960,162 @@ def test_indices(dimension, dtype, sparse):
         assert_array_equal(Xnp, X)
 
 
-@pytest.mark.parametrize(
-    "mask",
-    [
-        [[True, False], [False, True]],
-        [[False, True], [True, False]],
-        [[False, False], [True, True]],
-    ],
-    ids=[
-        "[[True, False], [False, True]]",
-        "[[False, True], [True, False]]",
-        "[[False, False], [True, True]]",
-    ],
-)
-@pytest.mark.parametrize(
-    "arr",
-    [[[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]],
-    ids=["[[0, 0], [0, 0]]", "[[1, 2], [1, 2]]", "[[1, 2], [3, 4]]"],
-)
-def test_putmask1(arr, mask):
-    a = numpy.array(arr)
-    ia = dpnp.array(a)
-    m = numpy.array(mask)
-    im = dpnp.array(m)
-    v = numpy.array([100, 200])
-    iv = dpnp.array(v)
-    numpy.putmask(a, m, v)
-    dpnp.putmask(ia, im, iv)
-    assert_array_equal(a, ia)
+class TestPutmask:
+    @pytest.mark.parametrize(
+        "shape",
+        [
+            (1,),
+            (5,),
+            (4, 3),
+            (3, 3),
+            (5, 3),
+            (3, 4, 5),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("order", ["C", "F"])
+    def test_putmask_scalar_values(self, shape, dt, order):
+        a_np = generate_random_numpy_array(shape, order=order, dtype=dt)
+        mask_np = a_np > 0
+        val = numpy.array(7, dtype=dt).item()
 
+        a_dp = dpnp.array(a_np)
+        mask_dp = dpnp.array(mask_np)
 
-@pytest.mark.parametrize(
-    "vals",
-    [
-        [100, 200],
-        [100, 200, 300, 400, 500, 600],
-        [100, 200, 300, 400, 500, 600, 800, 900],
-    ],
-    ids=[
-        "[100, 200]",
-        "[100, 200, 300, 400, 500, 600]",
-        "[100, 200, 300, 400, 500, 600, 800, 900]",
-    ],
-)
-@pytest.mark.parametrize(
-    "mask",
-    [
+        dpnp.putmask(a_dp, mask_dp, val)
+        numpy.putmask(a_np, mask_np, val)
+
+        assert_dtype_allclose(a_dp, a_np)
+
+    @pytest.mark.parametrize(
+        "shape",
         [
-            [[True, False], [False, True]],
-            [[False, True], [True, False]],
-            [[False, False], [True, True]],
-        ]
-    ],
-    ids=[
-        "[[[True, False], [False, True]], [[False, True], [True, False]], [[False, False], [True, True]]]"
-    ],
-)
-@pytest.mark.parametrize(
-    "arr",
-    [[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]],
-    ids=["[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]"],
-)
-def test_putmask2(arr, mask, vals):
-    a = numpy.array(arr)
-    ia = dpnp.array(a)
-    m = numpy.array(mask)
-    im = dpnp.array(m)
-    v = numpy.array(vals)
-    iv = dpnp.array(v)
-    numpy.putmask(a, m, v)
-    dpnp.putmask(ia, im, iv)
-    assert_array_equal(a, ia)
+            (1,),
+            (5,),
+            (4, 3),
+            (3, 3),
+            (5, 3),
+            (3, 4, 5),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("order", ["C", "F"])
+    def test_putmask_same_shape(self, shape, dt, order):
+        a_np = generate_random_numpy_array(shape, dtype=dt, order=order)
+        mask_np = a_np > 0
+        val_np = generate_random_numpy_array(shape, dtype=dt, order=order)
 
+        a_dp = dpnp.array(a_np, order=order)
+        mask_dp = dpnp.array(mask_np, order=order)
+        val_dp = dpnp.array(val_np, order=order)
 
-@pytest.mark.parametrize(
-    "vals",
-    [
-        [100, 200],
-        [100, 200, 300, 400, 500, 600],
-        [100, 200, 300, 400, 500, 600, 800, 900],
-    ],
-    ids=[
-        "[100, 200]",
-        "[100, 200, 300, 400, 500, 600]",
-        "[100, 200, 300, 400, 500, 600, 800, 900]",
-    ],
-)
-@pytest.mark.parametrize(
-    "mask",
-    [
+        dpnp.putmask(a_dp, mask_dp, val_dp)
+        numpy.putmask(a_np, mask_np, val_np)
+
+        assert_dtype_allclose(a_dp, a_np)
+
+    @pytest.mark.parametrize(
+        "a_shape,val_shape",
         [
-            [[[False, False], [True, True]], [[True, True], [True, True]]],
-            [[[False, False], [True, True]], [[False, False], [False, False]]],
-        ]
-    ],
-    ids=[
-        "[[[[False, False], [True, True]], [[True, True], [True, True]]], [[[False, False], [True, True]], [[False, False], [False, False]]]]"
-    ],
-)
-@pytest.mark.parametrize(
-    "arr",
-    [
+            ((6,), (3,)),
+            ((6,), (7,)),
+            ((2, 3), (5,)),
+            ((6, 3), (5, 4)),
+            ((4, 3, 5), (8,)),
+            ((2, 4, 3), (5, 5, 2)),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("order", ["C"])  # need to add "F"
+    def test_putmask_kernel(self, a_shape, val_shape, dt, order):
+        a_np = generate_random_numpy_array(a_shape, dtype=dt, order=order)
+        mask_np = a_np > 0
+        val_np = generate_random_numpy_array(val_shape, dtype=dt, order=order)
+
+        a_dp = dpnp.array(a_np, order=order)
+        mask_dp = dpnp.array(mask_np, order=order)
+        val_dp = dpnp.array(val_np, order=order)
+
+        dpnp.putmask(a_dp, mask_dp, val_dp)
+        numpy.putmask(a_np, mask_np, val_np)
+
+        assert_dtype_allclose(a_dp, a_np)
+
+    # test_putmask_strided
+
+    def test_putmask_mask_cast_to_bool(self):
+        a_np = generate_random_numpy_array((5, 5), dtype="f4")
+        mask_np = generate_random_numpy_array((5, 5), dtype="int64")
+        val_np = generate_random_numpy_array((7,), dtype="f4")
+
+        a_dp = dpnp.array(a_np)
+        mask_dp = dpnp.array(mask_np)
+        val_dp = dpnp.array(val_np)
+
+        dpnp.putmask(a_dp, mask_dp, val_dp)
+        numpy.putmask(a_np, mask_np, val_np)
+
+        assert_dtype_allclose(a_dp, a_np)
+
+    @pytest.mark.parametrize(
+        "dt", get_all_dtypes(no_bool=True, no_float16=False)
+    )
+    @pytest.mark.parametrize("order", ["C", "F"])
+    @pytest.mark.parametrize(
+        "shape",
         [
-            [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
-            [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
-        ]
-    ],
-    ids=[
-        "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]"
-    ],
-)
-def test_putmask3(arr, mask, vals):
-    a = numpy.array(arr)
-    ia = dpnp.array(a)
-    m = numpy.array(mask)
-    im = dpnp.array(m)
-    v = numpy.array(vals)
-    iv = dpnp.array(v)
-    numpy.putmask(a, m, v)
-    dpnp.putmask(ia, im, iv)
-    assert_array_equal(a, ia)
+            (0,),
+            (0, 3),
+            (2, 0),
+            (0, 2, 3),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "values_case", ["scalar", "same_shape", "diff_shape"]
+    )
+    def test_putmask_empty(self, dt, order, shape, values_case):
+        a_np = numpy.empty(shape, dtype=dt, order=order)
+        mask_np = numpy.empty(shape, dtype=numpy.bool_, order=order)
+
+        if values_case == "scalar":
+            val_np = numpy.asarray(1, dtype=dt).item()
+            val_dp = val_np
+        elif values_case == "same_shape":
+            val_np = numpy.empty(shape, dtype=dt, order=order)
+            val_dp = dpnp.array(val_np, order=order)
+        else:
+            # different shape
+            val_np = numpy.array([1, 2], dtype=dt)
+            val_dp = dpnp.array(val_np)
+
+        a_dp = dpnp.array(a_np, order=order)
+        mask_dp = dpnp.array(mask_np, order=order)
+
+        dpnp.putmask(a_dp, mask_dp, val_dp)
+        numpy.putmask(a_np, mask_np, val_np)
+
+        assert_dtype_allclose(a_dp, a_np)
+
+    def test_putmask_errors(self):
+        # shape mask mismatch
+        a = dpnp.arange(6).reshape(2, 3)
+        mask_bad = dpnp.ones((3, 2), dtype=dpnp.bool)
+        assert_raises(ValueError, dpnp.putmask, a, mask_bad, 1)
+
+        # safe-cast error
+        a = dpnp.arange(10, dtype=dpnp.int32)
+        mask = a > 3
+        val_f = dpnp.array([1.5, 2.5], dtype="f4")
+        assert_raises(TypeError, dpnp.putmask, a, mask, val_f)
+
+        # values as list
+        assert_raises(TypeError, dpnp.putmask, a, mask, [1, 2, 3])
 
 
 @pytest.mark.parametrize("m", [None, 0, 1, 2, 3, 4])

From 13a7adb4b0d08db87ba4a923b9244354f1478d05 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 23 Jan 2026 07:20:25 -0800
Subject: [PATCH 5/5] Add sycl queue validation for mask and values

---
 dpnp/dpnp_iface_indexing.py | 13 +++++++++++--
 dpnp/tests/test_indexing.py |  9 +++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index 31f9a680a51f..b252d252245c 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -1870,8 +1870,17 @@ def putmask(a, /, mask, values):
         if a.dtype != values.dtype:
             values = dpnp.astype(values, a.dtype, casting="safe", copy=False)
 
-        _, exec_q = get_usm_allocations([a, mask, values])
-
+        exec_q = a.sycl_queue
+        if (
+            dpu.get_execution_queue(
+                [exec_q, mask.sycl_queue, values.sycl_queue]
+            )
+            is None
+        ):
+            raise ValueError(
+                "`mask` and `values` must be allocated on "
+                "the same SYCL queue as `a`"
+            )
         _manager = dpu.SequentialOrderManager[exec_q]
         dep_evs = _manager.submitted_events
 
diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py
index b01ba9abfffb..822bd03d7a31 100644
--- a/dpnp/tests/test_indexing.py
+++ b/dpnp/tests/test_indexing.py
@@ -1117,6 +1117,15 @@ def test_putmask_errors(self):
         # values as list
         assert_raises(TypeError, dpnp.putmask, a, mask, [1, 2, 3])
 
+        # values has a different SYCL queue
+        q1 = dpctl.SyclQueue()
+        q2 = dpctl.SyclQueue()
+        a = dpnp.arange(10, sycl_queue=q1)
+        mask = a > 3
+        val = dpnp.arange(5, sycl_queue=q2)
+        if q1 != q2:
+            assert_raises(ValueError, dpnp.putmask, a, mask, val)
+
 
 @pytest.mark.parametrize("m", [None, 0, 1, 2, 3, 4])
 @pytest.mark.parametrize("k", [-3, -2, -1, 0, 1, 2, 3])